easy_ml/
linear_algebra.rs

1/*!
2 * Linear algebra algorithms on numbers and matrices
3 *
4 * Note that many of these functions are also exposed as corresponding methods on the Matrix type,
5 * and the Tensor type, but in depth documentation is only presented here.
6 *
7 * It is recommended to favor the corresponding methods on the Matrix and Tensor types as the
8 * Rust compiler can get confused with the generics on these functions if you use
9 * these methods without turbofish syntax.
10 *
11 * Nearly all of these functions are generic over [Numeric](super::numeric) types,
12 * unfortunately, when using these functions the compiler may get confused about what
13 * type `T` should be and you will get the error:
14 * > overflow evaluating the requirement `&'a _: easy_ml::numeric::NumericByValue<_, _>`
15 *
16 * In this case you need to manually specify the type of T by using the
17 * turbofish syntax like:
18 * `linear_algebra::inverse::<f32>(&matrix)`
19 *
20 * You might be working with a generic type of T, in which case specify that
21 * `linear_algebra::inverse::<T>(&matrix)`
22 *
23 * ## Generics
24 *
25 * For the tensor variants of these functions, the generics allow very flexible input types.
26 *
27 * A function like
28 * ```ignore
29 * pub fn inverse_tensor<T, S, I>(tensor: I) -> Option<Tensor<T, 2>> where
30 *    T: Numeric,
31 *    for<'a> &'a T: NumericRef<T>,
32 *    I: Into<TensorView<T, S, 2>>,
33 *    S: TensorRef<T, 2>,
34 * ```
35 * Means it takes any type that can be converted to a TensorView, which includes Tensor, &Tensor,
36 * &mut Tensor as well as references to a TensorView.
37 */
38
39use crate::matrices::{Column, Matrix, Row};
40use crate::numeric::extra::{Real, RealRef, Sqrt};
41use crate::numeric::{Numeric, NumericRef};
42use crate::tensors::views::{TensorRef, TensorView};
43use crate::tensors::{Dimension, Tensor};
44
45/**
46 * Computes the inverse of a matrix provided that it exists. To have an inverse
47 * a matrix must be square (same number of rows and columns) and it must also
48 * have a non zero determinant.
49 *
50 * The inverse of a matrix `A` is the matrix `A^-1` which when multiplied by `A`
51 * in either order yields the identity matrix `I`.
52 *
53 * `A(A^-1) == (A^-1)A == I`.
54 *
55 *The inverse is like the reciprocal of a number, except for matrices instead of scalars.
56 * With scalars, there is no inverse for `0` because `1 / 0` is not defined. Similarly
57 * to compute the inverse of a matrix we divide by the determinant, so matrices
58 * with a determinant of 0 have no inverse, even if they are square.
59 *
60 * This algorithm performs the analytic solution described by
61 * [wikipedia](https://en.wikipedia.org/wiki/Invertible_matrix#Analytic_solution)
62 * and should compute the inverse for any size of square matrix if it exists, but
63 * is inefficient for large matrices.
64 *
65 * # Warning
66 *
67 * With some uses of this function the Rust compiler gets confused about what type `T`
68 * should be and you will get the error:
69 * > overflow evaluating the requirement `&'a _: easy_ml::numeric::NumericByValue<_, _>`
70 *
71 * In this case you need to manually specify the type of T by using the
72 * turbofish syntax like:
73 * `linear_algebra::inverse::<f32>(&matrix)`
74 *
75 * Alternatively, the compiler doesn't seem to run into this problem if you
76 * use the equivalent methods on the matrix type like so:
77 * `matrix.inverse()`
78 */
79pub fn inverse<T: Numeric>(matrix: &Matrix<T>) -> Option<Matrix<T>>
80where
81    for<'a> &'a T: NumericRef<T>,
82{
83    if matrix.rows() != matrix.columns() {
84        return None;
85    }
86    // inverse of a 1 x 1 matrix is a special case
87    if matrix.rows() == 1 {
88        // determinant of a 1 x 1 matrix is the single element
89        let element = matrix.scalar();
90        if element == T::zero() {
91            return None;
92        }
93        return Some(Matrix::from_scalar(T::one() / element));
94    }
95
96    // compute the general case for a N x N matrix where N >= 2
97    match determinant::<T>(matrix) {
98        Some(det) => {
99            if det == T::zero() {
100                return None;
101            }
102            let determinant_reciprocal = T::one() / det;
103            let mut cofactor_matrix = Matrix::empty(T::zero(), matrix.size());
104            for i in 0..matrix.rows() {
105                for j in 0..matrix.columns() {
106                    // this should always return Some due to the earlier checks
107                    let ij_minor = minor::<T>(matrix, i, j)?;
108                    // i and j may each be up to the maximum value for usize but
109                    // we only need to know if they are even or odd as
110                    // -1 ^ (i + j) == -1 ^ ((i % 2) + (j % 2))
111                    // by taking modulo of both before adding we ensure there
112                    // is no overflow
113                    let sign = i8::pow(-1, (i.rem_euclid(2) + j.rem_euclid(2)) as u32);
114                    // convert sign into type T
115                    let sign = if sign == 1 {
116                        T::one()
117                    } else {
118                        T::zero() - T::one()
119                    };
120                    // each element of the cofactor matrix is -1^(i+j) * M_ij
121                    // for M_ij equal to the ij minor of the matrix
122                    cofactor_matrix.set(i, j, sign * ij_minor);
123                }
124            }
125            // tranposing the cofactor matrix yields the adjugate matrix
126            cofactor_matrix.transpose_mut();
127            // finally to compute the inverse we need to multiply each element by 1 / |A|
128            cofactor_matrix.map_mut(|element| element * determinant_reciprocal.clone());
129            Some(cofactor_matrix)
130        }
131        None => None,
132    }
133}
134
135/**
136 * Computes the inverse of a matrix provided that it exists. To have an inverse
137 * a matrix must be square (same number of rows and columns) and it must also
138 * have a non zero determinant.
139 *
140 * The first dimension in the Tensor's shape will be taken as the rows of the matrix, and the
141 * second dimension as the columns. If you instead have columns and then rows for the Tensor's
142 * shape, you should reorder the Tensor before calling this function to get the appropriate
143 * matrix.
144 *
145 * The inverse of a matrix `A` is the matrix `A^-1` which when multiplied by `A`
146 * in either order yields the identity matrix `I`.
147 *
148 * `A(A^-1) == (A^-1)A == I`.
149 *
150 *The inverse is like the reciprocal of a number, except for matrices instead of scalars.
151 * With scalars, there is no inverse for `0` because `1 / 0` is not defined. Similarly
152 * to compute the inverse of a matrix we divide by the determinant, so matrices
153 * with a determinant of 0 have no inverse, even if they are square.
154 *
155 * This algorithm performs the analytic solution described by
156 * [wikipedia](https://en.wikipedia.org/wiki/Invertible_matrix#Analytic_solution)
157 * and should compute the inverse for any size of square matrix if it exists, but
158 * is inefficient for large matrices.
159 *
160 * # Warning
161 *
162 * With some uses of this function the Rust compiler gets confused about what type `T`
163 * should be and you will get the error:
164 * > overflow evaluating the requirement `&'a _: easy_ml::numeric::NumericByValue<_, _>`
165 *
166 * In this case you need to manually specify the type of T by using the
167 * turbofish syntax like:
168 * `linear_algebra::inverse:_tensor:<f32>(&tensor)`
169 *
170 * Alternatively, the compiler doesn't seem to run into this problem if you
171 * use the equivalent methods on the tensor type like so:
172 * `tensor.inverse()`
173 */
174pub fn inverse_tensor<T, S, I>(tensor: I) -> Option<Tensor<T, 2>>
175where
176    T: Numeric,
177    for<'a> &'a T: NumericRef<T>,
178    I: Into<TensorView<T, S, 2>>,
179    S: TensorRef<T, 2>,
180{
181    inverse_less_generic::<T, S>(tensor.into())
182}
183
184fn inverse_less_generic<T, S>(tensor: TensorView<T, S, 2>) -> Option<Tensor<T, 2>>
185where
186    T: Numeric,
187    for<'a> &'a T: NumericRef<T>,
188    S: TensorRef<T, 2>,
189{
190    let shape = tensor.shape();
191    if !crate::tensors::dimensions::is_square(&shape) {
192        return None;
193    }
194
195    // inverse of a 1 x 1 matrix is a special case
196    if shape[0].1 == 1 {
197        // determinant of a 1 x 1 matrix is the single element
198        let element = tensor
199            .iter()
200            .next()
201            .expect("1x1 tensor must have a single element");
202        if element == T::zero() {
203            return None;
204        }
205        return Some(Tensor::from(shape, vec![T::one() / element]));
206    }
207
208    // compute the general case for a N x N matrix where N >= 2
209    match determinant_less_generic::<T, _>(&tensor) {
210        Some(det) => {
211            if det == T::zero() {
212                return None;
213            }
214            let determinant_reciprocal = T::one() / det;
215            let mut cofactor_matrix = Tensor::empty(shape, T::zero());
216            for ([i, j], x) in cofactor_matrix.iter_reference_mut().with_index() {
217                // this should always return Some due to the earlier checks
218                let ij_minor = minor_tensor::<T, _>(&tensor, i, j)?;
219                // i and j may each be up to the maximum value for usize but
220                // we only need to know if they are even or odd as
221                // -1 ^ (i + j) == -1 ^ ((i % 2) + (j % 2))
222                // by taking modulo of both before adding we ensure there
223                // is no overflow
224                let sign = i8::pow(-1, (i.rem_euclid(2) + j.rem_euclid(2)) as u32);
225                // convert sign into type T
226                let sign = if sign == 1 {
227                    T::one()
228                } else {
229                    T::zero() - T::one()
230                };
231                // each element of the cofactor matrix is -1^(i+j) * M_ij
232                // for M_ij equal to the ij minor of the matrix
233                *x = sign * ij_minor;
234            }
235            // tranposing the cofactor matrix yields the adjugate matrix
236            cofactor_matrix.transpose_mut([shape[1].0, shape[0].0]);
237            // finally to compute the inverse we need to multiply each element by 1 / |A|
238            cofactor_matrix.map_mut(|element| element * determinant_reciprocal.clone());
239            Some(cofactor_matrix)
240        }
241        None => None,
242    }
243}
244
245// TODO: expose these minor methods and test them directly
246// https://www.teachoo.com/9780/1204/Minor-and-Cofactor-of-a-determinant/category/Finding-Minors-and-cofactors/
247
248/*
249 * Computes the (i,j) minor of a matrix by copying it. This is the
250 * determinant of the matrix after deleting the ith row and the jth column.
251 *
252 * Minors can only be taken on matrices which have a determinant and rows and
253 * columns to remove. Hence for non square matrices or 1 x 1 matrices this returns
254 * None.
255 */
256fn minor<T: Numeric>(matrix: &Matrix<T>, i: Row, j: Column) -> Option<T>
257where
258    for<'a> &'a T: NumericRef<T>,
259{
260    minor_mut::<T>(&mut matrix.clone(), i, j)
261}
262
263/**
264 * Computes the (i,j) minor of a matrix by modifying it in place. This is
265 * the determinant of the matrix after deleting the ith row and the jth column.
266 *
267 * Minors can only be taken on matrices which have a determinant and rows and
268 * columns to remove. Hence for non square matrices or 1 x 1 matrices this returns
269 * None and does not modify the matrix.
270 */
271fn minor_mut<T: Numeric>(matrix: &mut Matrix<T>, i: Row, j: Column) -> Option<T>
272where
273    for<'a> &'a T: NumericRef<T>,
274{
275    if matrix.rows() == 1 || matrix.columns() == 1 {
276        // nothing to delete
277        return None;
278    }
279    if matrix.rows() != matrix.columns() {
280        // no determinant
281        return None;
282    }
283    matrix.remove_row(i);
284    matrix.remove_column(j);
285    determinant::<T>(matrix)
286}
287
288fn minor_tensor<T, S>(tensor: &TensorView<T, S, 2>, i: usize, j: usize) -> Option<T>
289where
290    T: Numeric,
291    for<'a> &'a T: NumericRef<T>,
292    S: TensorRef<T, 2>,
293{
294    use crate::tensors::views::{IndexRange, TensorMask};
295    let shape = tensor.shape();
296    if shape[0].1 == 1 || shape[1].1 == 1 {
297        // nothing to delete
298        return None;
299    }
300    if !crate::tensors::dimensions::is_square(&shape) {
301        // no determinant
302        return None;
303    }
304    let minored = TensorView::from(
305        TensorMask::from_all(
306            tensor.source_ref(),
307            [Some(IndexRange::new(i, 1)), Some(IndexRange::new(j, 1))],
308        )
309        .expect("Having just checked tensor is at least 2x2 we should be able to take a mask"),
310    );
311    determinant_less_generic::<T, _>(&minored)
312}
313
314/**
315 * Computes the determinant of a square matrix. For a 2 x 2 matrix this is given by
316 * `ad - bc` for:
317 * ```ignore
318 * [
319 *   a, b
320 *   c, d
321 * ]
322 * ```
323 *
324 * This function will return the determinant only if it exists. Non square matrices
325 * do not have a determinant. A determinant is a scalar value computed from the
326 * elements of a square matrix and often corresponds to matrices with special properties.
327 *
328 * Note that the determinant of a 1 x 1 matrix is just the element in the matrix.
329 *
330 * This function computes the determinant using the same type as that of the Matrix,
331 * hence if the input type is unsigned (such as Wrapping&lt;u8&gt;) the value computed
332 * is likely to not make any sense because a determinant may be negative.
333 *
334 * [https://en.wikipedia.org/wiki/Determinant](https://en.wikipedia.org/wiki/Determinant)
335 *
336 * # Warning
337 *
338 * With some uses of this function the Rust compiler gets confused about what type `T`
339 * should be and you will get the error:
340 * > overflow evaluating the requirement `&'a _: easy_ml::numeric::NumericByValue<_, _>`
341 *
342 * In this case you need to manually specify the type of T by using the
343 * turbofish syntax like:
344 * `linear_algebra::determinant::<f32>(&matrix)`
345 *
346 * Alternatively, the compiler doesn't seem to run into this problem if you
347 * use the equivalent methods on the matrix type like so:
348 * [`matrix.determinant()`](Matrix::determinant)
349 */
350pub fn determinant<T: Numeric>(matrix: &Matrix<T>) -> Option<T>
351where
352    for<'a> &'a T: NumericRef<T>,
353{
354    if matrix.rows() != matrix.columns() {
355        return None;
356    }
357    let length = matrix.rows();
358
359    match length {
360        0 => return None,
361        1 => return Some(matrix.scalar()),
362        _ => (),
363    };
364
365    determinant_less_generic::<T, _>(&TensorView::from(
366        crate::interop::TensorRefMatrix::from(matrix).ok()?,
367    ))
368}
369
370/**
371 * Computes the determinant of a square matrix. For a 2 x 2 matrix this is given by
372 * `ad - bc` for:
373 * ```ignore
374 * [
375 *   a, b
376 *   c, d
377 * ]
378 * ```
379 *
380 * This function will return the determinant only if it exists. Non square matrices
381 * do not have a determinant. A determinant is a scalar value computed from the
382 * elements of a square matrix and often corresponds to matrices with special properties.
383 *
384 * The first dimension in the Tensor's shape will be taken as the rows of the matrix, and the
385 * second dimension as the columns. If you instead have columns and then rows for the Tensor's
386 * shape, you should reorder the Tensor before calling this function to get the appropriate
387 * matrix.
388 *
389 * Note that the determinant of a 1 x 1 matrix is just the element in the matrix.
390 *
391 * This function computes the determinant using the same type as that of the Tensor,
392 * hence if the input type is unsigned (such as Wrapping&lt;u8&gt;) the value computed
393 * is likely to not make any sense because a determinant may be negative.
394 *
395 * [https://en.wikipedia.org/wiki/Determinant](https://en.wikipedia.org/wiki/Determinant)
396 *
397 * # Warning
398 *
399 * With some uses of this function the Rust compiler gets confused about what type `T`
400 * should be and you will get the error:
401 * > overflow evaluating the requirement `&'a _: easy_ml::numeric::NumericByValue<_, _>`
402 *
403 * In this case you need to manually specify the type of T by using the
404 * turbofish syntax like:
405 * `linear_algebra::determinant_tensor::<f32, _, _>(&tensor)`
406 *
407 * Alternatively, the compiler doesn't seem to run into this problem if you
408 * use the equivalent methods on the tensor type like so:
409 * [`tensor.determinant()`](Tensor::determinant)
410 */
411pub fn determinant_tensor<T, S, I>(tensor: I) -> Option<T>
412where
413    T: Numeric,
414    for<'a> &'a T: NumericRef<T>,
415    I: Into<TensorView<T, S, 2>>,
416    S: TensorRef<T, 2>,
417{
418    determinant_less_generic::<T, S>(&tensor.into())
419}
420
421fn determinant_less_generic<T, S>(tensor: &TensorView<T, S, 2>) -> Option<T>
422where
423    T: Numeric,
424    for<'a> &'a T: NumericRef<T>,
425    S: TensorRef<T, 2>,
426{
427    let shape = tensor.shape();
428    if !crate::tensors::dimensions::is_square(&shape) {
429        return None;
430    }
431    let length = shape[0].1;
432
433    if length == 0 {
434        return None;
435    }
436
437    let matrix = tensor.index();
438
439    if length == 1 {
440        return Some(matrix.get([0, 0]));
441    }
442
443    // compute the general case for the determinant of an N x N matrix with
444    // N >= 2
445
446    let mut sum = T::zero();
447
448    // iterate through all permutations of the numbers in the range from 0 to N - 1
449    // which we will use for indexing
450    with_each_permutation(&mut (0..length).collect(), &mut |permutation, even_swap| {
451        // Compute the signature for this permutation, such that we
452        // have +1 for an even number and -1 for an odd number of swaps
453        let signature = if even_swap {
454            T::one()
455        } else {
456            T::zero() - T::one()
457        };
458        let mut product = T::one();
459        for (n, i) in permutation.iter().enumerate() {
460            // Get the element at the index corresponding to n and the n'th
461            // element in the permutation list.
462            let element = matrix.get_ref([n, *i]);
463            product = product * element;
464        }
465        // copying the sum to prevent a move that stops us from returning it
466        // still massively reduces the amount of copies compared to using
467        // generate_permutations which would instead require copying the
468        // permutation list N! times though allow to not copy the sum.
469        sum = sum.clone() + (signature * product);
470    });
471
472    Some(sum)
473}
474
475/*
476 * Computes the factorial of a number.
477 * eg for an input of 5 computes 1 * 2 * 3 * 4 * 5
478 * which is equal to 120
479 */
480#[allow(dead_code)] // used in testing
481fn factorial(n: usize) -> usize {
482    (1..=n).product()
483}
484
485/**
486 * Performs repeated swaps on the provided mutable reference to a list, swapping
487 * exactly 1 pair each time before calling the consumer as defined by Heap's Algorithm
488 * https://en.wikipedia.org/wiki/Heap%27s_algorithm
489 */
490fn heaps_permutations<T: Clone, F>(k: usize, list: &mut Vec<T>, consumer: &mut F)
491where
492    F: FnMut(&mut Vec<T>),
493{
494    if k == 1 {
495        consumer(list);
496        return;
497    }
498
499    for i in 0..k {
500        heaps_permutations(k - 1, list, consumer);
501        // avoid redundant swaps
502        if i < k - 1 {
503            // Swap on the even/oddness of k
504            if k % 2 == 0 {
505                // if k is even swap final and the index
506                list.swap(i, k - 1);
507            } else {
508                // if k is odd swap final and first
509                list.swap(0, k - 1);
510            }
511        }
512    }
513}
514
515/**
516 * Generates a list of all possible permutations of a list, with each
517 * sublist one swap different from the last and correspondingly alternating
518 * in even and odd swaps required to obtain the reordering.
519 */
520#[allow(dead_code)] // used in testing
521fn generate_permutations<T: Clone>(list: &mut Vec<T>) -> Vec<(Vec<T>, bool)> {
522    let mut permutations = Vec::with_capacity(factorial(list.len()));
523    let mut even_swaps = true;
524    heaps_permutations(list.len(), list, &mut |permuted| {
525        permutations.push((permuted.clone(), even_swaps));
526        even_swaps = !even_swaps;
527    });
528    permutations
529}
530
531/*
532 * In place version of generate_permutations which calls the consumer on
533 * each permuted list without performing any copies (ie each permuted list
534 * is the same list before and after permutation).
535 */
536fn with_each_permutation<T: Clone, F>(list: &mut Vec<T>, consumer: &mut F)
537where
538    F: FnMut(&mut Vec<T>, bool),
539{
540    let mut even_swaps = true;
541    heaps_permutations(list.len(), list, &mut |permuted| {
542        consumer(permuted, even_swaps);
543        even_swaps = !even_swaps;
544    });
545}
546
547#[cfg(test)]
548#[test]
549fn test_permutations() {
550    // Exhaustively test permutation even/oddness for an input
551    // of length 3
552    let mut list = vec![1, 2, 3];
553    let permutations = generate_permutations(&mut list);
554    assert!(permutations.contains(&(vec![1, 2, 3], true)));
555    assert!(permutations.contains(&(vec![3, 2, 1], false)));
556    assert!(permutations.contains(&(vec![2, 3, 1], true)));
557    assert!(permutations.contains(&(vec![1, 3, 2], false)));
558    assert!(permutations.contains(&(vec![2, 1, 3], false)));
559    assert!(permutations.contains(&(vec![3, 1, 2], true)));
560    assert_eq!(permutations.len(), 6);
561
562    // Test a larger input non exhaustively to make sure it
563    // generalises.
564    let mut list = vec![1, 2, 3, 4, 5];
565    let permuted = generate_permutations(&mut list);
566    assert!(permuted.contains(&(vec![1, 2, 3, 4, 5], true)));
567    assert!(permuted.contains(&(vec![1, 2, 3, 5, 4], false)));
568    assert!(permuted.contains(&(vec![1, 2, 5, 3, 4], true)));
569
570    // Test a length 2 input as well
571    let mut list = vec![0, 1];
572    let permuted = generate_permutations(&mut list);
573    assert!(permuted.contains(&(vec![0, 1], true)));
574    assert!(permuted.contains(&(vec![1, 0], false)));
575    assert_eq!(permuted.len(), 2);
576}
577
578/**
579 * Computes the covariance matrix for an NxM feature matrix, in which
580 * each N'th row has M features to find the covariance and variance of.
581 *
582 * The covariance matrix is a matrix of how each feature varies with itself
583 * (along the diagonal) and all the other features (symmetrically above and below
584 * the diagonal).
585 *
586 * Each element in the covariance matrix at (i, j) will be the variance of the
587 * ith and jth features from the feature matrix, defined as the zero meaned
588 * dot product of the two feature vectors divided by the number of samples.
589 *
590 * If all the features in the input have a variance of one then the covariance matrix
591 * returned by this function will be equivalent to the correlation matrix of the input
592 *
593 * This function does not perform [Bessel's correction](https://en.wikipedia.org/wiki/Bessel%27s_correction)
594 *
595 * # Panics
596 *
597 * If the numeric type is unable to represent the number of samples
598 * for each feature (ie if `T: i8` and you have 1000 samples) then this function
599 * will panic.
600 *
601 * # Warning
602 *
603 * With some uses of this function the Rust compiler gets confused about what type `T`
604 * should be and you will get the error:
605 * > overflow evaluating the requirement `&'a _: easy_ml::numeric::NumericByValue<_, _>`
606 *
607 * In this case you need to manually specify the type of T by using the
608 * turbofish syntax like:
609 * `linear_algebra::covariance_column_features::<f32>(&matrix)`
610 *
611 * Alternatively, the compiler doesn't seem to run into this problem if you
612 * use the equivalent methods on the matrix type like so:
613 * `matrix.covariance_column_features()`
614 */
615pub fn covariance_column_features<T: Numeric>(matrix: &Matrix<T>) -> Matrix<T>
616where
617    for<'a> &'a T: NumericRef<T>,
618{
619    let features = matrix.columns();
620    let samples = T::from_usize(matrix.rows())
621        .expect("The maximum value of the matrix type T cannot represent this many samples");
622    let mut covariance_matrix = Matrix::empty(T::zero(), (features, features));
623    covariance_matrix.map_mut_with_index(|_, i, j| {
624        // set each element of the covariance matrix to the variance
625        // of features i and j
626        let feature_i_mean: T = matrix.column_iter(i).sum::<T>() / &samples;
627        let feature_j_mean: T = matrix.column_iter(j).sum::<T>() / &samples;
628        matrix
629            .column_reference_iter(i)
630            .map(|x| x - &feature_i_mean)
631            .zip(matrix.column_reference_iter(j).map(|y| y - &feature_j_mean))
632            .map(|(x, y)| x * y)
633            .sum::<T>()
634            / &samples
635    });
636    covariance_matrix
637}
638
639/**
640 * Computes the covariance matrix for an NxM feature matrix, in which
641 * each M'th column has N features to find the covariance and variance of.
642 *
643 * The covariance matrix is a matrix of how each feature varies with itself
644 * (along the diagonal) and all the other features (symmetrically above and below
645 * the diagonal).
646 *
647 * Each element in the covariance matrix at (i, j) will be the variance of the
648 * ith and jth features from the feature matrix, defined as the zero meaned
649 * dot product of the two feature vectors divided by the number of samples.
650 *
651 * If all the features in the input have a variance of one then the covariance matrix
652 * returned by this function will be equivalent to the correlation matrix of the input
653 *
654 * This function does not perform [Bessel's correction](https://en.wikipedia.org/wiki/Bessel%27s_correction)
655 *
656 * # Panics
657 *
658 * If the numeric type is unable to represent the number of samples
659 * for each feature (ie if `T: i8` and you have 1000 samples) then this function
660 * will panic.
661 *
662 * # Warning
663 *
664 * With some uses of this function the Rust compiler gets confused about what type `T`
665 * should be and you will get the error:
666 * > overflow evaluating the requirement `&'a _: easy_ml::numeric::NumericByValue<_, _>`
667 *
668 * In this case you need to manually specify the type of T by using the
669 * turbofish syntax like:
670 * `linear_algebra::covariance_row_features::<f32>(&matrix)`
671 *
672 * Alternatively, the compiler doesn't seem to run into this problem if you
673 * use the equivalent methods on the matrix type like so:
674 * `matrix.covariance_row_features()`
675 */
676pub fn covariance_row_features<T: Numeric>(matrix: &Matrix<T>) -> Matrix<T>
677where
678    for<'a> &'a T: NumericRef<T>,
679{
680    let features = matrix.rows();
681    let samples = T::from_usize(matrix.columns())
682        .expect("The maximum value of the matrix type T cannot represent this many samples");
683    let mut covariance_matrix = Matrix::empty(T::zero(), (features, features));
684    covariance_matrix.map_mut_with_index(|_, i, j| {
685        // set each element of the covariance matrix to the variance
686        // of features i and j
687        let feature_i_mean: T = matrix.row_iter(i).sum::<T>() / &samples;
688        let feature_j_mean: T = matrix.row_iter(j).sum::<T>() / &samples;
689        matrix
690            .row_reference_iter(i)
691            .map(|x| x - &feature_i_mean)
692            .zip(matrix.row_reference_iter(j).map(|y| y - &feature_j_mean))
693            .map(|(x, y)| x * y)
694            .sum::<T>()
695            / &samples
696    });
697    covariance_matrix
698}
699
700/**
701 * Computes the covariance matrix for a 2 dimensional Tensor feature matrix.
702 *
703 * The `feature_dimension` specifies which dimension holds the features. The other dimension
704 * is assumed to hold the samples. For a Tensor with a `feature_dimension` of length N, and
705 * the other dimension of length M, returns an NxN covariance matrix with a shape of
706 * `[("i", N), ("j", N)]`.
707 *
708 * Each element in the covariance matrix at (i, j) will be the variance of the ith and jth
709 * features from the feature matrix, defined as the zero meaned dot product of the two
710 * feature vectors divided by the number of samples (M).
711 *
712 * If all the features in the input have a variance of one then the covariance matrix
713 * returned by this function will be equivalent to the correlation matrix of the input
714 *
715 * This function does not perform [Bessel's correction](https://en.wikipedia.org/wiki/Bessel%27s_correction)
716 *
717 * # Panics
718 *
719 * - If the numeric type is unable to represent the number of samples for each feature
720 * (ie if `T: i8` and you have 1000 samples)
721 * - If the provided feature_dimension is not a dimension in the tensor
722 *
723 * # Warning
724 *
725 * With some uses of this function the Rust compiler gets confused about what type `T`
726 * should be and you will get the error:
727 * > overflow evaluating the requirement `&'a _: easy_ml::numeric::NumericByValue<_, _>`
728 *
729 * In this case you need to manually specify the type of T by using the
730 * turbofish syntax like:
731 * `linear_algebra::covariance::<f32, _, _>(&matrix)`
732 *
733 * Alternatively, the compiler doesn't seem to run into this problem if you
734 * use the equivalent methods on the matrix type like so:
735 * `tensor.covariance("features")`
736 *
737 * # Example
738 *
739 * ```
740 * use easy_ml::tensors::Tensor;
741 * let matrix = Tensor::from([("samples", 5), ("features", 3)], vec![
742 * //  X     Y    Z
743 *     1.0,  0.0, 0.5,
744 *     1.2, -1.0, 0.4,
745 *     1.8, -1.2, 0.7,
746 *     0.9,  0.1, 0.3,
747 *     0.7,  0.5, 0.6
748 * ]);
749 * let covariance_matrix = matrix.covariance("features");
750 * let (x, y, z) = (0, 1, 2);
751 * let x_y_z = covariance_matrix.index();
752 * // the variance of each feature with itself is positive
753 * assert!(x_y_z.get([x, x]) > 0.0);
754 * assert!(x_y_z.get([y, y]) > 0.0);
755 * assert!(x_y_z.get([z, z]) > 0.0);
756 * // first feature X and second feature Y have negative covariance (as X goes up Y goes down)
757 * assert!(x_y_z.get([x, y]) < 0.0);
758 * println!("{}", covariance_matrix);
759 * // D = 2
760 * // ("i", 3), ("j", 3)
761 * // [ 0.142, -0.226, 0.026
762 * //   -0.226, 0.438, -0.022
763 * //   0.026, -0.022, 0.020 ]
764 * ```
765 */
766#[track_caller]
767pub fn covariance<T, S, I>(tensor: I, feature_dimension: Dimension) -> Tensor<T, 2>
768where
769    T: Numeric,
770    for<'a> &'a T: NumericRef<T>,
771    I: Into<TensorView<T, S, 2>>,
772    S: TensorRef<T, 2>,
773{
774    covariance_less_generic::<T, S>(tensor.into(), feature_dimension)
775}
776
777#[track_caller]
778fn covariance_less_generic<T, S>(
779    tensor: TensorView<T, S, 2>,
780    feature_dimension: Dimension,
781) -> Tensor<T, 2>
782where
783    T: Numeric,
784    for<'a> &'a T: NumericRef<T>,
785    S: TensorRef<T, 2>,
786{
787    let shape = tensor.shape();
788    let features_index = {
789        if shape[0].0 == feature_dimension {
790            0
791        } else if shape[1].0 == feature_dimension {
792            1
793        } else {
794            panic!(
795                "Feature dimension {:?} is not present in the input tensor's shape: {:?}",
796                feature_dimension, shape
797            );
798        }
799    };
800    let (feature_dimension, features) = shape[features_index];
801    let (_sample_dimension, samples) = shape[1 - features_index];
802    let samples = T::from_usize(samples)
803        .expect("The maximum value of the matrix type T cannot represent this many samples");
804    let mut covariance_matrix = Tensor::empty([("i", features), ("j", features)], T::zero());
805    covariance_matrix.map_mut_with_index(|[i, j], _| {
806        // set each element of the covariance matrix to the variance of features i and j
807        #[rustfmt::skip]
808        let feature_i_mean: T = tensor
809            .select([(feature_dimension, i)])
810            .iter()
811            .sum::<T>() / &samples;
812        #[rustfmt::skip]
813        let feature_j_mean: T = tensor
814            .select([(feature_dimension, j)])
815            .iter()
816            .sum::<T>() / &samples;
817        tensor
818            .select([(feature_dimension, i)])
819            .iter_reference()
820            .map(|x| x - &feature_i_mean)
821            .zip(
822                tensor
823                    .select([(feature_dimension, j)])
824                    .iter_reference()
825                    .map(|y| y - &feature_j_mean),
826            )
827            .map(|(x, y)| x * y)
828            .sum::<T>()
829            / &samples
830    });
831    covariance_matrix
832}
833
834/**
835 * Computes the mean of the values in an iterator, consuming the iterator.
836 *
837 * This function does not perform [Bessel's correction](https://en.wikipedia.org/wiki/Bessel%27s_correction)
838 *
839 * # Panics
840 *
841 * If the iterator is empty. This function will also fail if the length of the iterator
842 * or sum of all the values in the iterator exceeds the maximum number the type can
843 * represent.
844 */
845pub fn mean<I, T: Numeric>(mut data: I) -> T
846where
847    I: Iterator<Item = T>,
848{
849    let mut next = data.next();
850    assert!(next.is_some(), "Provided iterator must not be empty");
851    let mut count = T::zero();
852    let mut sum = T::zero();
853    while next.is_some() {
854        count = count + T::one();
855        sum = sum + next.unwrap();
856        next = data.next();
857    }
858    sum / count
859}
860
861/**
862 * Computes the variance of the values in an iterator, consuming the iterator.
863 *
864 * Variance is defined as expected value of of the squares of the zero mean data.
865 * It captures how much data varies from its mean, ie the spread of the data.
866 *
867 * This function does not perform [Bessel's correction](https://en.wikipedia.org/wiki/Bessel%27s_correction)
868 *
869 * Variance may also be computed as the mean of each squared datapoint minus the
870 * square of the mean of the data. Although this method would allow for a streaming
871 * implementation the [wikipedia page](https://en.wikipedia.org/wiki/Variance#Definition)
872 * cautions: "This equation should not be used for computations using floating point
873 * arithmetic because it suffers from catastrophic cancellation if the two components
874 * of the equation are similar in magnitude".
875 *
876 * # Panics
877 *
878 * If the iterator is empty. This function will also fail if the length of the iterator
879 * or sum of all the values in the iterator exceeds the maximum number the type can
880 * represent.
881 */
882pub fn variance<I, T: Numeric>(data: I) -> T
883where
884    I: Iterator<Item = T>,
885{
886    let list = data.collect::<Vec<T>>();
887    assert!(!list.is_empty(), "Provided iterator must not be empty");
888
889    // copy the list as we need to keep it as well as getting the mean
890    let m = mean(list.iter().cloned());
891    // use drain here as we no longer need to keep list
892    mean(
893        list.into_iter()
894            .map(|x| (x.clone() - m.clone()) * (x - m.clone())),
895    )
896}
897
898/**
899 * Computes the softmax of the values in an iterator, consuming the iterator.
900 *
901 * softmax(z)\[i\] = e<sup>z<sub>i</sub></sup> / the sum of e<sup>z<sub>j</sub></sup> for all j
902 * where z is a list of elements
903 *
904 * Softmax normalises an input of numbers into a probability distribution, such
905 * that they will sum to 1. This is often used to make a neural network
906 * output a single number.
907 *
908 * The implementation shifts the inputs by the maximum value in the iterator,
909 * to make numerical overflow less of a problem. As per the definition of softmax,
910 * softmax(z) = softmax(z-max(z)).
911 *
912 * [Further information](https://en.wikipedia.org/wiki/Softmax_function)
913 *
914 * # Panics
915 *
916 * If the iterator contains NaN values, or any value for which PartialOrd fails.
917 *
918 * This function will also fail if the length of the iterator or sum of all the values
919 * in the iterator exceeds the maximum or minimum number the type can represent.
920 */
921pub fn softmax<I, T: Real>(data: I) -> Vec<T>
922where
923    I: Iterator<Item = T>,
924{
925    let list = data.collect::<Vec<T>>();
926    if list.is_empty() {
927        return Vec::with_capacity(0);
928    }
929    let max = list
930        .iter()
931        .max_by(|a, b| a.partial_cmp(b).expect("NaN should not be in list"))
932        .unwrap();
933
934    let denominator: T = list.iter().cloned().map(|x| (x - max).exp()).sum();
935    list.iter()
936        .cloned()
937        .map(|x| (x - max).exp() / denominator.clone())
938        .collect()
939}
940
941/**
942 * Computes the F-1 score of the Precision and Recall
943 *
944 * 2 * (precision * recall) / (precision + recall)
945 *
946 * # [F-1 score](https://en.wikipedia.org/wiki/F1_score)
947 *
948 * This is a harmonic mean of the two, which penalises the score
949 * more heavily if either the precision or recall are poor than
950 * an arithmetic mean.
951 *
952 * The F-1 score is a helpful metric for assessing classifiers, as
953 * it takes into account that classes may be heavily biased which
954 * Accuracy does not. For example, it may be quite easy to create a
955 * 95% accurate test for a medical condition, which inuitively seems
956 * very good, but if 99.9% of patients are expected to not have the
957 * condition then accuracy is a poor way to measure performance because
958 * it does not consider that the cost of false negatives is very high.
959 *
960 * Note that Precision and Recall both depend on there being a positive
961 * and negative class for a classification task, in some contexts this may
962 * be an arbitrary choice.
963 *
964 * # [Precision](https://en.wikipedia.org/wiki/Precision_and_recall)
965 *
966 * In classification, precision is true positives / positive predictions.
967 * It measures correct identifications of the positive class compared
968 * to all predictions of the positive class. You can trivially get
969 * 100% precision by never predicting the positive class, as this can
970 * never result in a false positive.
971 *
972 * Note that the meaning of precision in classification or document
973 * retrieval is not the same as its meaning in [measurements](https://en.wikipedia.org/wiki/Accuracy_and_precision).
974 *
975 * # [Recall](https://en.wikipedia.org/wiki/Precision_and_recall)
976 *
977 * In classification, recall is true positives / actual positives.
978 * It measures how many of the positive cases are identified. You
979 * can trivially get 100% recall by always predicting the positive class,
980 * as this can never result in a false negative.
981 *
982 * [F scores](https://en.wikipedia.org/wiki/F1_score)
983 *
984 * The F-1 score is an evenly weighted combination of Precision and
985 * Recall. For domains where the cost of false positives and false
986 * negatives are not equal, you should use a biased F score that weights
987 * precision or recall more strongly than the other.
988 */
989pub fn f1_score<T: Numeric>(precision: T, recall: T) -> T {
990    (T::one() + T::one()) * ((precision.clone() * recall.clone()) / (precision + recall))
991}
992
993/**
994 * Computes the cholesky decomposition of a matrix. This yields a matrix `L`
995 * such that for the provided matrix `A`, `L * L^T = A`. `L` will always be
996 * lower triangular, ie all entries above the diagonal will be 0. Hence cholesky
997 * decomposition can be interpreted as a generalised square root function.
998 *
999 * Cholesky decomposition is defined for
1000 * [symmetric](https://en.wikipedia.org/wiki/Hermitian_matrix),
1001 * [positive definite](https://en.wikipedia.org/wiki/Definite_matrix) matrices.
1002 *
1003 * This function does not check that the provided matrix is symmetric. However, given a symmetric
1004 * input, if the input is not positive definite `None` is returned. Attempting a cholseky
1005 * decomposition is also an efficient way to check if such a matrix is positive definite.
1006 * In the future additional checks that the input is valid could be added.
1007 */
1008pub fn cholesky_decomposition<T: Numeric + Sqrt<Output = T>>(
1009    matrix: &Matrix<T>,
1010) -> Option<Matrix<T>>
1011where
1012    for<'a> &'a T: NumericRef<T>,
1013{
1014    let lower_triangular = cholesky_decomposition_less_generic::<T, _>(&TensorView::from(
1015        crate::interop::TensorRefMatrix::from(matrix).ok()?,
1016    ))?;
1017    Some(lower_triangular.into_matrix())
1018}
1019
1020/**
1021 * Computes the cholesky decomposition of a Tensor matrix. This yields a matrix `L`
1022 * such that for the provided matrix `A`, `L * L^T = A`. `L` will always be
1023 * lower triangular, ie all entries above the diagonal will be 0. Hence cholesky
1024 * decomposition can be interpreted as a generalised square root function.
1025 *
1026 * Cholesky decomposition is defined for
1027 * [symmetric](https://en.wikipedia.org/wiki/Hermitian_matrix),
1028 * [positive definite](https://en.wikipedia.org/wiki/Definite_matrix) matrices.
1029 *
1030 * This function does not check that the provided matrix is symmetric. However, given a symmetric
1031 * input, if the input is not positive definite `None` is returned. Attempting a cholseky
1032 * decomposition is also an efficient way to check if such a matrix is positive definite.
1033 * In the future additional checks that the input is valid could be added.
1034 *
1035 * The output matrix will have the same shape as the input.
1036 */
1037pub fn cholesky_decomposition_tensor<T, S, I>(tensor: I) -> Option<Tensor<T, 2>>
1038where
1039    T: Numeric + Sqrt<Output = T>,
1040    for<'a> &'a T: NumericRef<T>,
1041    I: Into<TensorView<T, S, 2>>,
1042    S: TensorRef<T, 2>,
1043{
1044    cholesky_decomposition_less_generic::<T, S>(&tensor.into())
1045}
1046
1047#[rustfmt::skip]
1048fn cholesky_decomposition_less_generic<T, S>(tensor: &TensorView<T, S, 2>) -> Option<Tensor<T, 2>>
1049where
1050    T: Numeric + Sqrt<Output = T>,
1051    for<'a> &'a T: NumericRef<T>,
1052    S: TensorRef<T, 2>,
1053{
1054    let shape = tensor.shape();
1055    if !crate::tensors::dimensions::is_square(&shape) {
1056        return None;
1057    }
1058    // The computation steps are outlined nicely at https://rosettacode.org/wiki/Cholesky_decomposition
1059    let mut lower_triangular = Tensor::empty(shape, T::zero());
1060    let mut lower_triangular_indexing = lower_triangular.index_mut();
1061    let tensor_indexing = tensor.index();
1062    let n = shape[0].1;
1063
1064    for i in 0..n {
1065        // For each column j we need to compute all i, j entries
1066        // before incrementing j further as the diagonals depend
1067        // on the elements below the diagonal of the previous columns,
1068        // and the elements below the diagonal depend on the diagonal
1069        // of their column and elements below the diagonal up to that
1070        // column.
1071
1072        for j in 0..=i {
1073            // For the i = j case we compute the sum of squares, otherwise we're
1074            // computing a sum of L_ik * L_jk using the current column and prior columns
1075            let sum = {
1076                let mut sum = T::zero();
1077                for k in 0..j {
1078                    sum = &sum
1079                        + (lower_triangular_indexing.get_ref([i, k])
1080                            * lower_triangular_indexing.get_ref([j, k]));
1081                }
1082                sum
1083            };
1084            // Calculate L_ij as we step through the lower diagonal
1085            *lower_triangular_indexing.get_ref_mut([i, j]) = if i == j {
1086                let entry_squared = tensor_indexing.get_ref([i, j]) - sum;
1087                if entry_squared <= T::zero() {
1088                    // input wasn't positive definite! avoid sqrt of a negative number.
1089                    // We can take sqrt(0) but that will leave a 0 on the diagonal which
1090                    // will then cause division by zero for the j < i case later.
1091                    return None;
1092                }
1093                entry_squared.sqrt()
1094            } else /* j < i */ {
1095                (tensor_indexing.get_ref([i, j]) - sum)
1096                    * (T::one() / lower_triangular_indexing.get_ref([j, j]))
1097            };
1098        }
1099    }
1100    Some(lower_triangular)
1101}
1102
1103/**
1104 * The result of an `LDL^T` Decomposition of some matrix `A` such that `LDL^T = A`.
1105 */
1106#[derive(Clone, Debug)]
1107#[non_exhaustive]
1108pub struct LDLTDecomposition<T> {
1109    pub l: Matrix<T>,
1110    pub d: Matrix<T>,
1111}
1112
1113impl<T: std::fmt::Display + Clone> std::fmt::Display for LDLTDecomposition<T> {
1114    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
1115        writeln!(f, "L:\n{}", &self.l)?;
1116        write!(f, "D:\n{}", &self.d)
1117    }
1118}
1119
1120impl<T> LDLTDecomposition<T> {
1121    /**
1122     * Creates an `LDL^T` Decomposition struct from two matrices without checking that `LDL^T = A`
1123     * or that L and D have the intended properties.
1124     *
1125     * This is provided for assistance with unit testing.
1126     */
1127    pub fn from_unchecked(l: Matrix<T>, d: Matrix<T>) -> LDLTDecomposition<T> {
1128        LDLTDecomposition { l, d }
1129    }
1130}
1131
1132/**
1133 * Computes the LDL^T decomposition of a matrix. This yields a matrix `L` and a matrix `D`
1134 * such that for the provided matrix `A`, `L * D * L^T = A`. `L` will always be
1135 * unit lower triangular, ie all entries above the diagonal will be 0, and all entries along
1136 * the diagonal will br 1. `D` will always contain zeros except along the diagonal. This
1137 * decomposition is closely related to the [cholesky decomposition](cholesky_decomposition)
1138 * with the notable difference that it avoids taking square roots.
1139 *
1140 * Similarly to the cholseky decomposition, the input matrix must be
1141 * [symmetric](https://en.wikipedia.org/wiki/Hermitian_matrix) and
1142 * [positive definite](https://en.wikipedia.org/wiki/Definite_matrix).
1143 *
1144 * This function does not check that the provided matrix is symmetric. However, given a symmetric
1145 * input, if the input is only positive **semi**definite `None` is returned. In the future
1146 * additional checks that the input is valid could be added.
1147 *
1148 * # Warning
1149 *
1150 * With some uses of this function the Rust compiler gets confused about what type `T`
1151 * should be and you will get the error:
1152 * > overflow evaluating the requirement `&'a _: easy_ml::numeric::NumericByValue<_, _>`
1153 *
1154 * In this case you need to manually specify the type of T by using the
1155 * turbofish syntax like:
1156 * `linear_algebra::ldlt_decomposition::<f32>(&matrix)`
1157 */
1158pub fn ldlt_decomposition<T>(matrix: &Matrix<T>) -> Option<LDLTDecomposition<T>>
1159where
1160    T: Numeric,
1161    for<'a> &'a T: NumericRef<T>,
1162{
1163    let decomposition = ldlt_decomposition_less_generic::<T, _>(&TensorView::from(
1164        crate::interop::TensorRefMatrix::from(matrix).ok()?,
1165    ))?;
1166    Some(LDLTDecomposition {
1167        l: decomposition.l.into_matrix(),
1168        d: decomposition.d.into_matrix(),
1169    })
1170}
1171
1172/**
1173 * The result of an `LDL^T` Decomposition of some matrix `A` such that `LDL^T = A`.
1174 */
1175#[derive(Clone, Debug)]
1176#[non_exhaustive]
1177pub struct LDLTDecompositionTensor<T> {
1178    pub l: Tensor<T, 2>,
1179    pub d: Tensor<T, 2>,
1180}
1181
1182impl<T: std::fmt::Display + Clone> std::fmt::Display for LDLTDecompositionTensor<T> {
1183    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
1184        writeln!(f, "L:\n{}", &self.l)?;
1185        write!(f, "D:\n{}", &self.d)
1186    }
1187}
1188
1189impl<T> LDLTDecompositionTensor<T> {
1190    /**
1191     * Creates an `LDL^T` Decomposition struct from two matrices without checking that `LDL^T = A`
1192     * or that L and D have the intended properties.
1193     *
1194     * This is provided for assistance with unit testing.
1195     */
1196    pub fn from_unchecked(l: Tensor<T, 2>, d: Tensor<T, 2>) -> LDLTDecompositionTensor<T> {
1197        LDLTDecompositionTensor { l, d }
1198    }
1199}
1200
1201/**
1202 * Computes the LDL^T decomposition of a matrix. This yields a matrix `L` and a matrix `D`
1203 * such that for the provided matrix `A`, `L * D * L^T = A`. `L` will always be
1204 * unit lower triangular, ie all entries above the diagonal will be 0, and all entries along
1205 * the diagonal will br 1. `D` will always contain zeros except along the diagonal. This
1206 * decomposition is closely related to the [cholesky decomposition](cholesky_decomposition)
1207 * with the notable difference that it avoids taking square roots.
1208 *
1209 * Similarly to the cholseky decomposition, the input matrix must be
1210 * [symmetric](https://en.wikipedia.org/wiki/Hermitian_matrix) and
1211 * [positive definite](https://en.wikipedia.org/wiki/Definite_matrix).
1212 *
1213 * This function does not check that the provided matrix is symmetric. However, given a symmetric
1214 * input, if the input is only positive **semi**definite `None` is returned. In the future
1215 * additional checks that the input is valid could be added.
1216 *
1217 * The shapes of the `L` and `D` matrices will be the same as the input matrix.
1218 *
1219 * # Warning
1220 *
1221 * With some uses of this function the Rust compiler gets confused about what type `T`
1222 * should be and you will get the error:
1223 * > overflow evaluating the requirement `&'a _: easy_ml::numeric::NumericByValue<_, _>`
1224 *
1225 * In this case you need to manually specify the type of T by using the
1226 * turbofish syntax like:
1227 * `linear_algebra::ldlt_decomposition_tensor::<f32, _, _>(&tensor)`
1228 */
1229pub fn ldlt_decomposition_tensor<T, S, I>(tensor: I) -> Option<LDLTDecompositionTensor<T>>
1230where
1231    T: Numeric,
1232    for<'a> &'a T: NumericRef<T>,
1233    I: Into<TensorView<T, S, 2>>,
1234    S: TensorRef<T, 2>,
1235{
1236    ldlt_decomposition_less_generic::<T, S>(&tensor.into())
1237}
1238
1239fn ldlt_decomposition_less_generic<T, S>(
1240    tensor: &TensorView<T, S, 2>,
1241) -> Option<LDLTDecompositionTensor<T>>
1242where
1243    T: Numeric,
1244    for<'a> &'a T: NumericRef<T>,
1245    S: TensorRef<T, 2>,
1246{
1247    // The algorithm is outlined nicely in context as Algorithm 1.2 here:
1248    // https://mcsweeney90.github.io/files/modified-cholesky-decomposition-and-applications.pdf
1249    // and also as proper code here (though a less efficient solution):
1250    // https://astroanddata.blogspot.com/2020/04/ldl-decomposition-with-python.html
1251    let shape = tensor.shape();
1252    if !crate::tensors::dimensions::is_square(&shape) {
1253        return None;
1254    }
1255    let mut lower_triangular = Tensor::empty(shape, T::zero());
1256    let mut diagonal = Tensor::empty(shape, T::zero());
1257    let mut lower_triangular_indexing = lower_triangular.index_mut();
1258    let mut diagonal_indexing = diagonal.index_mut();
1259    let tensor_indexing = tensor.index();
1260    let n = shape[0].1;
1261
1262    for j in 0..n {
1263        #[rustfmt::skip]
1264        let sum = {
1265            let mut sum = T::zero();
1266            for k in 0..j {
1267                sum = &sum + (
1268                    lower_triangular_indexing.get_ref([j, k]) *
1269                    lower_triangular_indexing.get_ref([j, k]) *
1270                    diagonal_indexing.get_ref([k, k])
1271                );
1272            }
1273            sum
1274        };
1275        *diagonal_indexing.get_ref_mut([j, j]) = {
1276            let entry = tensor_indexing.get_ref([j, j]) - sum;
1277            if entry == T::zero() {
1278                // If input is positive definite then no diagonal will be 0. Otherwise we
1279                // fail the decomposition to avoid division by zero in the j < i case later.
1280                // Note: unlike cholseky, negatives here are fine since we can still perform
1281                // the calculations sensibly.
1282                return None;
1283            }
1284            entry
1285        };
1286        for i in j..n {
1287            #[rustfmt::skip]
1288            let x = if i == j {
1289                T::one()
1290            } else /* j < i */ {
1291                let sum = {
1292                    let mut sum = T::zero();
1293                    for k in 0..j {
1294                        sum = &sum + (
1295                            lower_triangular_indexing.get_ref([i, k]) *
1296                            lower_triangular_indexing.get_ref([j, k]) *
1297                            diagonal_indexing.get_ref([k, k])
1298                        );
1299                    }
1300                    sum
1301                };
1302                (tensor_indexing.get_ref([i, j]) - sum) *
1303                    (T::one() / diagonal_indexing.get_ref([j, j]))
1304            };
1305            *lower_triangular_indexing.get_ref_mut([i, j]) = x;
1306        }
1307    }
1308    Some(LDLTDecompositionTensor {
1309        l: lower_triangular,
1310        d: diagonal,
1311    })
1312}
1313
1314/**
1315 * The result of a QR Decomposition of some matrix A such that `QR = A`.
1316 */
1317#[derive(Clone, Debug)]
1318#[non_exhaustive]
1319pub struct QRDecomposition<T> {
1320    pub q: Matrix<T>,
1321    pub r: Matrix<T>,
1322}
1323
1324impl<T: std::fmt::Display + Clone> std::fmt::Display for QRDecomposition<T> {
1325    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
1326        writeln!(f, "Q:\n{}", &self.q)?;
1327        write!(f, "R:\n{}", &self.r)
1328    }
1329}
1330
1331impl<T> QRDecomposition<T> {
1332    /**
1333     * Creates a QR Decomposition struct from two matrices without checking that `QR = A`
1334     * or that Q and R have the intended properties.
1335     *
1336     * This is provided for assistance with unit testing.
1337     */
1338    pub fn from_unchecked(q: Matrix<T>, r: Matrix<T>) -> QRDecomposition<T> {
1339        QRDecomposition { q, r }
1340    }
1341}
1342
1343fn householder_matrix_tensor<T: Real>(vector: Vec<T>, names: [Dimension; 2]) -> Tensor<T, 2>
1344where
1345    for<'a> &'a T: RealRef<T>,
1346{
1347    // The computation steps are outlined nicely at https://en.wikipedia.org/wiki/QR_decomposition#Using_Householder_reflections
1348    // Supporting reference implementations are on Rosettacode https://rosettacode.org/wiki/QR_decomposition
1349    // we hardcode to taking the first column vector of the input matrix
1350    let x = Tensor::from([("r", vector.len())], vector);
1351    let shape = x.shape();
1352    let rows = shape[0].1;
1353    let length = x.euclidean_length();
1354    let a = {
1355        // we hardcode to wanting to zero all elements below the first
1356        let sign = x.first();
1357        if sign > T::zero() { length } else { -length }
1358    };
1359    let u = {
1360        // u = x - ae, where e is [1 0 0 0 ... 0]^T, and x is the column vector so
1361        // u is equal to x except for the first element.
1362        // Also, we invert the sign of a to avoid loss of significance, so u[0] becomes x[0] + a
1363        let mut u = x;
1364        let mut u_indexing = u.index_mut();
1365        *u_indexing.get_ref_mut([0]) = u_indexing.get_ref([0]) + a;
1366        u
1367    };
1368    // v = u / ||u||
1369    let v = {
1370        let length = u.euclidean_length();
1371        u.map(|element| element / &length)
1372    };
1373    let identity = Tensor::diagonal([(names[0], rows), (names[1], rows)], T::one());
1374    let two = T::one() + T::one();
1375    let v_column = Tensor::from([(names[0], rows), (names[1], 1)], v.iter().collect());
1376    let v_row = Tensor::from([(names[0], 1), (names[1], rows)], v.iter().collect());
1377    // I - 2 v v^T
1378    identity - ((v_column * v_row) * two)
1379}
1380
1381/**
1382 * Computes a QR decomposition of a MxN matrix where M >= N.
1383 *
1384 * For an input matrix A, decomposes this matrix into a product of QR, where Q is an
1385 * [orthogonal matrix](https://en.wikipedia.org/wiki/Orthogonal_matrix) and R is an
1386 * upper triangular matrix (all entries below the diagonal are 0), and QR = A.
1387 *
1388 * If the input matrix has more columns than rows, returns None.
1389 *
1390 * # Warning
1391 *
1392 * With some uses of this function the Rust compiler gets confused about what type `T`
1393 * should be and you will get the error:
1394 * > overflow evaluating the requirement `&'a _: easy_ml::numeric::RealByValue<_, _>`
1395 *
1396 * In this case you need to manually specify the type of T by using the
1397 * turbofish syntax like:
1398 * `linear_algebra::qr_decomposition::<f32>(&matrix)`
1399 */
1400pub fn qr_decomposition<T: Real>(matrix: &Matrix<T>) -> Option<QRDecomposition<T>>
1401where
1402    for<'a> &'a T: RealRef<T>,
1403{
1404    let decomposition = qr_decomposition_less_generic::<T, _>(&TensorView::from(
1405        crate::interop::TensorRefMatrix::from(matrix).ok()?,
1406    ))?;
1407    Some(QRDecomposition {
1408        q: decomposition.q.into_matrix(),
1409        r: decomposition.r.into_matrix(),
1410    })
1411}
1412
1413/**
1414 * The result of a QR Decomposition of some matrix A such that `QR = A`.
1415 */
1416#[derive(Clone, Debug)]
1417#[non_exhaustive]
1418pub struct QRDecompositionTensor<T> {
1419    pub q: Tensor<T, 2>,
1420    pub r: Tensor<T, 2>,
1421}
1422
1423impl<T: std::fmt::Display + Clone> std::fmt::Display for QRDecompositionTensor<T> {
1424    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
1425        writeln!(f, "Q:\n{}", &self.q)?;
1426        write!(f, "R:\n{}", &self.r)
1427    }
1428}
1429
1430impl<T> QRDecompositionTensor<T> {
1431    /**
1432     * Creates a QR Decomposition struct from two matrices without checking that `QR = A`
1433     * or that Q and R have the intended properties.
1434     *
1435     * This is provided for assistance with unit testing.
1436     */
1437    pub fn from_unchecked(q: Tensor<T, 2>, r: Tensor<T, 2>) -> QRDecompositionTensor<T> {
1438        QRDecompositionTensor { q, r }
1439    }
1440}
1441
1442/**
1443 * Computes a QR decomposition of a MxN matrix where M >= N.
1444 *
1445 * For an input matrix A, decomposes this matrix into a product of QR, where Q is an
1446 * [orthogonal matrix](https://en.wikipedia.org/wiki/Orthogonal_matrix) and R is an
1447 * upper triangular matrix (all entries below the diagonal are 0), and QR = A.
1448 *
1449 * The first dimension in the Tensor's shape will be taken as the rows of the matrix, and the
1450 * second dimension as the columns. If you instead have columns and then rows for the Tensor's
1451 * shape, you should reorder the Tensor before calling this function to get the appropriate matrix.
1452 *
1453 * If the input matrix has more columns than rows, returns None.
1454 *
1455 * The shape of R will be the same as the input, and the shape of Q will be of lengths MxM
1456 * in relation to the MxN input matrix with the same dimension names as the input. Hence, QR
1457 * yields the same shape as the input.
1458 *
1459 * # Warning
1460 *
1461 * With some uses of this function the Rust compiler gets confused about what type `T`
1462 * should be and you will get the error:
1463 * > overflow evaluating the requirement `&'a _: easy_ml::numeric::RealByValue<_, _>`
1464 *
1465 * In this case you need to manually specify the type of T by using the
1466 * turbofish syntax like:
1467 * `linear_algebra::qr_decomposition_tensor::<f32, _, _>(&tensor)`
1468 */
1469pub fn qr_decomposition_tensor<T, S, I>(tensor: I) -> Option<QRDecompositionTensor<T>>
1470where
1471    T: Real,
1472    for<'a> &'a T: RealRef<T>,
1473    I: Into<TensorView<T, S, 2>>,
1474    S: TensorRef<T, 2>,
1475{
1476    qr_decomposition_less_generic::<T, S>(&tensor.into())
1477}
1478
1479fn qr_decomposition_less_generic<T, S>(
1480    tensor: &TensorView<T, S, 2>,
1481) -> Option<QRDecompositionTensor<T>>
1482where
1483    T: Real,
1484    for<'a> &'a T: RealRef<T>,
1485    S: TensorRef<T, 2>,
1486{
1487    let shape = tensor.shape();
1488    let names = crate::tensors::dimensions::names_of(&shape);
1489    let rows = shape[0].1;
1490    let columns = shape[1].1;
1491    if columns > rows {
1492        return None;
1493    }
1494    // The computation steps are outlined nicely at https://en.wikipedia.org/wiki/QR_decomposition#Using_Householder_reflections
1495    // Supporting reference implementations are at Rosettacode https://rosettacode.org/wiki/QR_decomposition
1496    let iterations = std::cmp::min(rows - 1, columns);
1497    let mut q = None;
1498    let mut r = tensor.map(|x| x);
1499    for c in 0..iterations {
1500        // Conceptually, on each iteration we take a minor of r to retain the bottom right of
1501        // the matrix, with one fewer row/column on each iteration since that will have already
1502        // been zeroed. However, we then immediately discard all but the first column of that
1503        // minor, so we skip the minor step and compute directly the first column of the minor
1504        // we would have taken.
1505        let submatrix_first_column = r
1506            .select([(shape[1].0, c)])
1507            .iter()
1508            .skip(c)
1509            .collect::<Vec<_>>();
1510        // compute the (M-column)x(M-column) householder matrix
1511        let h = householder_matrix_tensor::<T>(submatrix_first_column, names);
1512        // pad the h into the bottom right of an identity matrix so it is MxM
1513        // like so:
1514        // 1 0 0
1515        // 0 H H
1516        // 0 H H
1517        let h = {
1518            let h_indexing = h.index();
1519            let mut identity = Tensor::diagonal([(shape[0].0, rows), (shape[1].0, rows)], T::one());
1520            // the column we're on is the same as how many steps we inset the
1521            // householder matrix into the identity
1522            let inset = c;
1523            for ([i, j], x) in identity.iter_reference_mut().with_index() {
1524                if i >= inset && j >= inset {
1525                    *x = h_indexing.get([i - inset, j - inset]);
1526                }
1527            }
1528            identity
1529        };
1530        // R = H_n * ... H_3 * H_2 * H_1 * A
1531        r = &h * r;
1532        // Q = H_1 * H_2 * H_3 .. H_n
1533        match q {
1534            None => q = Some(h),
1535            Some(h_previous) => q = Some(h_previous * h),
1536        }
1537    }
1538    Some(QRDecompositionTensor {
1539        // This should always be Some because the input matrix has to be at least 1x1
1540        q: q.unwrap(),
1541        r,
1542    })
1543}
easy_ml/linear_algebra.rs

easy_ml/
linear_algebra.rs