easy_ml/linear_algebra.rs
1/*!
2 * Linear algebra algorithms on numbers and matrices
3 *
4 * Note that many of these functions are also exposed as corresponding methods on the Matrix type,
5 * and the Tensor type, but in depth documentation is only presented here.
6 *
7 * It is recommended to favor the corresponding methods on the Matrix and Tensor types as the
8 * Rust compiler can get confused with the generics on these functions if you use
9 * these methods without turbofish syntax.
10 *
11 * Nearly all of these functions are generic over [Numeric](super::numeric) types,
12 * unfortunately, when using these functions the compiler may get confused about what
13 * type `T` should be and you will get the error:
14 * > overflow evaluating the requirement `&'a _: easy_ml::numeric::NumericByValue<_, _>`
15 *
16 * In this case you need to manually specify the type of T by using the
17 * turbofish syntax like:
18 * `linear_algebra::inverse::<f32>(&matrix)`
19 *
20 * You might be working with a generic type of T, in which case specify that
21 * `linear_algebra::inverse::<T>(&matrix)`
22 *
23 * ## Generics
24 *
25 * For the tensor variants of these functions, the generics allow very flexible input types.
26 *
27 * A function like
28 * ```ignore
29 * pub fn inverse_tensor<T, S, I>(tensor: I) -> Option<Tensor<T, 2>> where
30 * T: Numeric,
31 * for<'a> &'a T: NumericRef<T>,
32 * I: Into<TensorView<T, S, 2>>,
33 * S: TensorRef<T, 2>,
34 * ```
35 * Means it takes any type that can be converted to a TensorView, which includes Tensor, &Tensor,
36 * &mut Tensor as well as references to a TensorView.
37 */
38
39use crate::matrices::{Column, Matrix, Row};
40use crate::numeric::extra::{Real, RealRef, Sqrt};
41use crate::numeric::{Numeric, NumericRef};
42use crate::tensors::views::{TensorRef, TensorView};
43use crate::tensors::{Dimension, Tensor};
44
45/**
46 * Computes the inverse of a matrix provided that it exists. To have an inverse
47 * a matrix must be square (same number of rows and columns) and it must also
48 * have a non zero determinant.
49 *
50 * The inverse of a matrix `A` is the matrix `A^-1` which when multiplied by `A`
51 * in either order yields the identity matrix `I`.
52 *
53 * `A(A^-1) == (A^-1)A == I`.
54 *
55 *The inverse is like the reciprocal of a number, except for matrices instead of scalars.
56 * With scalars, there is no inverse for `0` because `1 / 0` is not defined. Similarly
57 * to compute the inverse of a matrix we divide by the determinant, so matrices
58 * with a determinant of 0 have no inverse, even if they are square.
59 *
60 * This algorithm performs the analytic solution described by
61 * [wikipedia](https://en.wikipedia.org/wiki/Invertible_matrix#Analytic_solution)
62 * and should compute the inverse for any size of square matrix if it exists, but
63 * is inefficient for large matrices.
64 *
65 * # Warning
66 *
67 * With some uses of this function the Rust compiler gets confused about what type `T`
68 * should be and you will get the error:
69 * > overflow evaluating the requirement `&'a _: easy_ml::numeric::NumericByValue<_, _>`
70 *
71 * In this case you need to manually specify the type of T by using the
72 * turbofish syntax like:
73 * `linear_algebra::inverse::<f32>(&matrix)`
74 *
75 * Alternatively, the compiler doesn't seem to run into this problem if you
76 * use the equivalent methods on the matrix type like so:
77 * `matrix.inverse()`
78 */
79pub fn inverse<T: Numeric>(matrix: &Matrix<T>) -> Option<Matrix<T>>
80where
81 for<'a> &'a T: NumericRef<T>,
82{
83 if matrix.rows() != matrix.columns() {
84 return None;
85 }
86 // inverse of a 1 x 1 matrix is a special case
87 if matrix.rows() == 1 {
88 // determinant of a 1 x 1 matrix is the single element
89 let element = matrix.scalar();
90 if element == T::zero() {
91 return None;
92 }
93 return Some(Matrix::from_scalar(T::one() / element));
94 }
95
96 // compute the general case for a N x N matrix where N >= 2
97 match determinant::<T>(matrix) {
98 Some(det) => {
99 if det == T::zero() {
100 return None;
101 }
102 let determinant_reciprocal = T::one() / det;
103 let mut cofactor_matrix = Matrix::empty(T::zero(), matrix.size());
104 for i in 0..matrix.rows() {
105 for j in 0..matrix.columns() {
106 // this should always return Some due to the earlier checks
107 let ij_minor = minor::<T>(matrix, i, j)?;
108 // i and j may each be up to the maximum value for usize but
109 // we only need to know if they are even or odd as
110 // -1 ^ (i + j) == -1 ^ ((i % 2) + (j % 2))
111 // by taking modulo of both before adding we ensure there
112 // is no overflow
113 let sign = i8::pow(-1, (i.rem_euclid(2) + j.rem_euclid(2)) as u32);
114 // convert sign into type T
115 let sign = if sign == 1 {
116 T::one()
117 } else {
118 T::zero() - T::one()
119 };
120 // each element of the cofactor matrix is -1^(i+j) * M_ij
121 // for M_ij equal to the ij minor of the matrix
122 cofactor_matrix.set(i, j, sign * ij_minor);
123 }
124 }
125 // tranposing the cofactor matrix yields the adjugate matrix
126 cofactor_matrix.transpose_mut();
127 // finally to compute the inverse we need to multiply each element by 1 / |A|
128 cofactor_matrix.map_mut(|element| element * determinant_reciprocal.clone());
129 Some(cofactor_matrix)
130 }
131 None => None,
132 }
133}
134
135/**
136 * Computes the inverse of a matrix provided that it exists. To have an inverse
137 * a matrix must be square (same number of rows and columns) and it must also
138 * have a non zero determinant.
139 *
140 * The first dimension in the Tensor's shape will be taken as the rows of the matrix, and the
141 * second dimension as the columns. If you instead have columns and then rows for the Tensor's
142 * shape, you should reorder the Tensor before calling this function to get the appropriate
143 * matrix.
144 *
145 * The inverse of a matrix `A` is the matrix `A^-1` which when multiplied by `A`
146 * in either order yields the identity matrix `I`.
147 *
148 * `A(A^-1) == (A^-1)A == I`.
149 *
150 *The inverse is like the reciprocal of a number, except for matrices instead of scalars.
151 * With scalars, there is no inverse for `0` because `1 / 0` is not defined. Similarly
152 * to compute the inverse of a matrix we divide by the determinant, so matrices
153 * with a determinant of 0 have no inverse, even if they are square.
154 *
155 * This algorithm performs the analytic solution described by
156 * [wikipedia](https://en.wikipedia.org/wiki/Invertible_matrix#Analytic_solution)
157 * and should compute the inverse for any size of square matrix if it exists, but
158 * is inefficient for large matrices.
159 *
160 * # Warning
161 *
162 * With some uses of this function the Rust compiler gets confused about what type `T`
163 * should be and you will get the error:
164 * > overflow evaluating the requirement `&'a _: easy_ml::numeric::NumericByValue<_, _>`
165 *
166 * In this case you need to manually specify the type of T by using the
167 * turbofish syntax like:
168 * `linear_algebra::inverse:_tensor:<f32>(&tensor)`
169 *
170 * Alternatively, the compiler doesn't seem to run into this problem if you
171 * use the equivalent methods on the tensor type like so:
172 * `tensor.inverse()`
173 */
174pub fn inverse_tensor<T, S, I>(tensor: I) -> Option<Tensor<T, 2>>
175where
176 T: Numeric,
177 for<'a> &'a T: NumericRef<T>,
178 I: Into<TensorView<T, S, 2>>,
179 S: TensorRef<T, 2>,
180{
181 inverse_less_generic::<T, S>(tensor.into())
182}
183
184fn inverse_less_generic<T, S>(tensor: TensorView<T, S, 2>) -> Option<Tensor<T, 2>>
185where
186 T: Numeric,
187 for<'a> &'a T: NumericRef<T>,
188 S: TensorRef<T, 2>,
189{
190 let shape = tensor.shape();
191 if !crate::tensors::dimensions::is_square(&shape) {
192 return None;
193 }
194
195 // inverse of a 1 x 1 matrix is a special case
196 if shape[0].1 == 1 {
197 // determinant of a 1 x 1 matrix is the single element
198 let element = tensor
199 .iter()
200 .next()
201 .expect("1x1 tensor must have a single element");
202 if element == T::zero() {
203 return None;
204 }
205 return Some(Tensor::from(shape, vec![T::one() / element]));
206 }
207
208 // compute the general case for a N x N matrix where N >= 2
209 match determinant_less_generic::<T, _>(&tensor) {
210 Some(det) => {
211 if det == T::zero() {
212 return None;
213 }
214 let determinant_reciprocal = T::one() / det;
215 let mut cofactor_matrix = Tensor::empty(shape, T::zero());
216 for ([i, j], x) in cofactor_matrix.iter_reference_mut().with_index() {
217 // this should always return Some due to the earlier checks
218 let ij_minor = minor_tensor::<T, _>(&tensor, i, j)?;
219 // i and j may each be up to the maximum value for usize but
220 // we only need to know if they are even or odd as
221 // -1 ^ (i + j) == -1 ^ ((i % 2) + (j % 2))
222 // by taking modulo of both before adding we ensure there
223 // is no overflow
224 let sign = i8::pow(-1, (i.rem_euclid(2) + j.rem_euclid(2)) as u32);
225 // convert sign into type T
226 let sign = if sign == 1 {
227 T::one()
228 } else {
229 T::zero() - T::one()
230 };
231 // each element of the cofactor matrix is -1^(i+j) * M_ij
232 // for M_ij equal to the ij minor of the matrix
233 *x = sign * ij_minor;
234 }
235 // tranposing the cofactor matrix yields the adjugate matrix
236 cofactor_matrix.transpose_mut([shape[1].0, shape[0].0]);
237 // finally to compute the inverse we need to multiply each element by 1 / |A|
238 cofactor_matrix.map_mut(|element| element * determinant_reciprocal.clone());
239 Some(cofactor_matrix)
240 }
241 None => None,
242 }
243}
244
245// TODO: expose these minor methods and test them directly
246// https://www.teachoo.com/9780/1204/Minor-and-Cofactor-of-a-determinant/category/Finding-Minors-and-cofactors/
247
248/*
249 * Computes the (i,j) minor of a matrix by copying it. This is the
250 * determinant of the matrix after deleting the ith row and the jth column.
251 *
252 * Minors can only be taken on matrices which have a determinant and rows and
253 * columns to remove. Hence for non square matrices or 1 x 1 matrices this returns
254 * None.
255 */
256fn minor<T: Numeric>(matrix: &Matrix<T>, i: Row, j: Column) -> Option<T>
257where
258 for<'a> &'a T: NumericRef<T>,
259{
260 minor_mut::<T>(&mut matrix.clone(), i, j)
261}
262
263/**
264 * Computes the (i,j) minor of a matrix by modifying it in place. This is
265 * the determinant of the matrix after deleting the ith row and the jth column.
266 *
267 * Minors can only be taken on matrices which have a determinant and rows and
268 * columns to remove. Hence for non square matrices or 1 x 1 matrices this returns
269 * None and does not modify the matrix.
270 */
271fn minor_mut<T: Numeric>(matrix: &mut Matrix<T>, i: Row, j: Column) -> Option<T>
272where
273 for<'a> &'a T: NumericRef<T>,
274{
275 if matrix.rows() == 1 || matrix.columns() == 1 {
276 // nothing to delete
277 return None;
278 }
279 if matrix.rows() != matrix.columns() {
280 // no determinant
281 return None;
282 }
283 matrix.remove_row(i);
284 matrix.remove_column(j);
285 determinant::<T>(matrix)
286}
287
288fn minor_tensor<T, S>(tensor: &TensorView<T, S, 2>, i: usize, j: usize) -> Option<T>
289where
290 T: Numeric,
291 for<'a> &'a T: NumericRef<T>,
292 S: TensorRef<T, 2>,
293{
294 use crate::tensors::views::{IndexRange, TensorMask};
295 let shape = tensor.shape();
296 if shape[0].1 == 1 || shape[1].1 == 1 {
297 // nothing to delete
298 return None;
299 }
300 if !crate::tensors::dimensions::is_square(&shape) {
301 // no determinant
302 return None;
303 }
304 let minored = TensorView::from(
305 TensorMask::from_all(
306 tensor.source_ref(),
307 [Some(IndexRange::new(i, 1)), Some(IndexRange::new(j, 1))],
308 )
309 .expect("Having just checked tensor is at least 2x2 we should be able to take a mask"),
310 );
311 determinant_less_generic::<T, _>(&minored)
312}
313
314/**
315 * Computes the determinant of a square matrix. For a 2 x 2 matrix this is given by
316 * `ad - bc` for:
317 * ```ignore
318 * [
319 * a, b
320 * c, d
321 * ]
322 * ```
323 *
324 * This function will return the determinant only if it exists. Non square matrices
325 * do not have a determinant. A determinant is a scalar value computed from the
326 * elements of a square matrix and often corresponds to matrices with special properties.
327 *
328 * Note that the determinant of a 1 x 1 matrix is just the element in the matrix.
329 *
330 * This function computes the determinant using the same type as that of the Matrix,
331 * hence if the input type is unsigned (such as Wrapping<u8>) the value computed
332 * is likely to not make any sense because a determinant may be negative.
333 *
334 * [https://en.wikipedia.org/wiki/Determinant](https://en.wikipedia.org/wiki/Determinant)
335 *
336 * # Warning
337 *
338 * With some uses of this function the Rust compiler gets confused about what type `T`
339 * should be and you will get the error:
340 * > overflow evaluating the requirement `&'a _: easy_ml::numeric::NumericByValue<_, _>`
341 *
342 * In this case you need to manually specify the type of T by using the
343 * turbofish syntax like:
344 * `linear_algebra::determinant::<f32>(&matrix)`
345 *
346 * Alternatively, the compiler doesn't seem to run into this problem if you
347 * use the equivalent methods on the matrix type like so:
348 * [`matrix.determinant()`](Matrix::determinant)
349 */
350pub fn determinant<T: Numeric>(matrix: &Matrix<T>) -> Option<T>
351where
352 for<'a> &'a T: NumericRef<T>,
353{
354 if matrix.rows() != matrix.columns() {
355 return None;
356 }
357 let length = matrix.rows();
358
359 match length {
360 0 => return None,
361 1 => return Some(matrix.scalar()),
362 _ => (),
363 };
364
365 determinant_less_generic::<T, _>(&TensorView::from(
366 crate::interop::TensorRefMatrix::from(matrix).ok()?,
367 ))
368}
369
370/**
371 * Computes the determinant of a square matrix. For a 2 x 2 matrix this is given by
372 * `ad - bc` for:
373 * ```ignore
374 * [
375 * a, b
376 * c, d
377 * ]
378 * ```
379 *
380 * This function will return the determinant only if it exists. Non square matrices
381 * do not have a determinant. A determinant is a scalar value computed from the
382 * elements of a square matrix and often corresponds to matrices with special properties.
383 *
384 * The first dimension in the Tensor's shape will be taken as the rows of the matrix, and the
385 * second dimension as the columns. If you instead have columns and then rows for the Tensor's
386 * shape, you should reorder the Tensor before calling this function to get the appropriate
387 * matrix.
388 *
389 * Note that the determinant of a 1 x 1 matrix is just the element in the matrix.
390 *
391 * This function computes the determinant using the same type as that of the Tensor,
392 * hence if the input type is unsigned (such as Wrapping<u8>) the value computed
393 * is likely to not make any sense because a determinant may be negative.
394 *
395 * [https://en.wikipedia.org/wiki/Determinant](https://en.wikipedia.org/wiki/Determinant)
396 *
397 * # Warning
398 *
399 * With some uses of this function the Rust compiler gets confused about what type `T`
400 * should be and you will get the error:
401 * > overflow evaluating the requirement `&'a _: easy_ml::numeric::NumericByValue<_, _>`
402 *
403 * In this case you need to manually specify the type of T by using the
404 * turbofish syntax like:
405 * `linear_algebra::determinant_tensor::<f32, _, _>(&tensor)`
406 *
407 * Alternatively, the compiler doesn't seem to run into this problem if you
408 * use the equivalent methods on the tensor type like so:
409 * [`tensor.determinant()`](Tensor::determinant)
410 */
411pub fn determinant_tensor<T, S, I>(tensor: I) -> Option<T>
412where
413 T: Numeric,
414 for<'a> &'a T: NumericRef<T>,
415 I: Into<TensorView<T, S, 2>>,
416 S: TensorRef<T, 2>,
417{
418 determinant_less_generic::<T, S>(&tensor.into())
419}
420
421fn determinant_less_generic<T, S>(tensor: &TensorView<T, S, 2>) -> Option<T>
422where
423 T: Numeric,
424 for<'a> &'a T: NumericRef<T>,
425 S: TensorRef<T, 2>,
426{
427 let shape = tensor.shape();
428 if !crate::tensors::dimensions::is_square(&shape) {
429 return None;
430 }
431 let length = shape[0].1;
432
433 if length == 0 {
434 return None;
435 }
436
437 let matrix = tensor.index();
438
439 if length == 1 {
440 return Some(matrix.get([0, 0]));
441 }
442
443 // compute the general case for the determinant of an N x N matrix with
444 // N >= 2
445
446 let mut sum = T::zero();
447
448 // iterate through all permutations of the numbers in the range from 0 to N - 1
449 // which we will use for indexing
450 with_each_permutation(&mut (0..length).collect(), &mut |permutation, even_swap| {
451 // Compute the signature for this permutation, such that we
452 // have +1 for an even number and -1 for an odd number of swaps
453 let signature = if even_swap {
454 T::one()
455 } else {
456 T::zero() - T::one()
457 };
458 let mut product = T::one();
459 for (n, i) in permutation.iter().enumerate() {
460 // Get the element at the index corresponding to n and the n'th
461 // element in the permutation list.
462 let element = matrix.get_ref([n, *i]);
463 product = product * element;
464 }
465 // copying the sum to prevent a move that stops us from returning it
466 // still massively reduces the amount of copies compared to using
467 // generate_permutations which would instead require copying the
468 // permutation list N! times though allow to not copy the sum.
469 sum = sum.clone() + (signature * product);
470 });
471
472 Some(sum)
473}
474
475/*
476 * Computes the factorial of a number.
477 * eg for an input of 5 computes 1 * 2 * 3 * 4 * 5
478 * which is equal to 120
479 */
480#[allow(dead_code)] // used in testing
481fn factorial(n: usize) -> usize {
482 (1..=n).product()
483}
484
485/**
486 * Performs repeated swaps on the provided mutable reference to a list, swapping
487 * exactly 1 pair each time before calling the consumer as defined by Heap's Algorithm
488 * https://en.wikipedia.org/wiki/Heap%27s_algorithm
489 */
490fn heaps_permutations<T: Clone, F>(k: usize, list: &mut Vec<T>, consumer: &mut F)
491where
492 F: FnMut(&mut Vec<T>),
493{
494 if k == 1 {
495 consumer(list);
496 return;
497 }
498
499 for i in 0..k {
500 heaps_permutations(k - 1, list, consumer);
501 // avoid redundant swaps
502 if i < k - 1 {
503 // Swap on the even/oddness of k
504 if k % 2 == 0 {
505 // if k is even swap final and the index
506 list.swap(i, k - 1);
507 } else {
508 // if k is odd swap final and first
509 list.swap(0, k - 1);
510 }
511 }
512 }
513}
514
515/**
516 * Generates a list of all possible permutations of a list, with each
517 * sublist one swap different from the last and correspondingly alternating
518 * in even and odd swaps required to obtain the reordering.
519 */
520#[allow(dead_code)] // used in testing
521fn generate_permutations<T: Clone>(list: &mut Vec<T>) -> Vec<(Vec<T>, bool)> {
522 let mut permutations = Vec::with_capacity(factorial(list.len()));
523 let mut even_swaps = true;
524 heaps_permutations(list.len(), list, &mut |permuted| {
525 permutations.push((permuted.clone(), even_swaps));
526 even_swaps = !even_swaps;
527 });
528 permutations
529}
530
531/*
532 * In place version of generate_permutations which calls the consumer on
533 * each permuted list without performing any copies (ie each permuted list
534 * is the same list before and after permutation).
535 */
536fn with_each_permutation<T: Clone, F>(list: &mut Vec<T>, consumer: &mut F)
537where
538 F: FnMut(&mut Vec<T>, bool),
539{
540 let mut even_swaps = true;
541 heaps_permutations(list.len(), list, &mut |permuted| {
542 consumer(permuted, even_swaps);
543 even_swaps = !even_swaps;
544 });
545}
546
547#[cfg(test)]
548#[test]
549fn test_permutations() {
550 // Exhaustively test permutation even/oddness for an input
551 // of length 3
552 let mut list = vec![1, 2, 3];
553 let permutations = generate_permutations(&mut list);
554 assert!(permutations.contains(&(vec![1, 2, 3], true)));
555 assert!(permutations.contains(&(vec![3, 2, 1], false)));
556 assert!(permutations.contains(&(vec![2, 3, 1], true)));
557 assert!(permutations.contains(&(vec![1, 3, 2], false)));
558 assert!(permutations.contains(&(vec![2, 1, 3], false)));
559 assert!(permutations.contains(&(vec![3, 1, 2], true)));
560 assert_eq!(permutations.len(), 6);
561
562 // Test a larger input non exhaustively to make sure it
563 // generalises.
564 let mut list = vec![1, 2, 3, 4, 5];
565 let permuted = generate_permutations(&mut list);
566 assert!(permuted.contains(&(vec![1, 2, 3, 4, 5], true)));
567 assert!(permuted.contains(&(vec![1, 2, 3, 5, 4], false)));
568 assert!(permuted.contains(&(vec![1, 2, 5, 3, 4], true)));
569
570 // Test a length 2 input as well
571 let mut list = vec![0, 1];
572 let permuted = generate_permutations(&mut list);
573 assert!(permuted.contains(&(vec![0, 1], true)));
574 assert!(permuted.contains(&(vec![1, 0], false)));
575 assert_eq!(permuted.len(), 2);
576}
577
578/**
579 * Computes the covariance matrix for an NxM feature matrix, in which
580 * each N'th row has M features to find the covariance and variance of.
581 *
582 * The covariance matrix is a matrix of how each feature varies with itself
583 * (along the diagonal) and all the other features (symmetrically above and below
584 * the diagonal).
585 *
586 * Each element in the covariance matrix at (i, j) will be the variance of the
587 * ith and jth features from the feature matrix, defined as the zero meaned
588 * dot product of the two feature vectors divided by the number of samples.
589 *
590 * If all the features in the input have a variance of one then the covariance matrix
591 * returned by this function will be equivalent to the correlation matrix of the input
592 *
593 * This function does not perform [Bessel's correction](https://en.wikipedia.org/wiki/Bessel%27s_correction)
594 *
595 * # Panics
596 *
597 * If the numeric type is unable to represent the number of samples
598 * for each feature (ie if `T: i8` and you have 1000 samples) then this function
599 * will panic.
600 *
601 * # Warning
602 *
603 * With some uses of this function the Rust compiler gets confused about what type `T`
604 * should be and you will get the error:
605 * > overflow evaluating the requirement `&'a _: easy_ml::numeric::NumericByValue<_, _>`
606 *
607 * In this case you need to manually specify the type of T by using the
608 * turbofish syntax like:
609 * `linear_algebra::covariance_column_features::<f32>(&matrix)`
610 *
611 * Alternatively, the compiler doesn't seem to run into this problem if you
612 * use the equivalent methods on the matrix type like so:
613 * `matrix.covariance_column_features()`
614 */
615pub fn covariance_column_features<T: Numeric>(matrix: &Matrix<T>) -> Matrix<T>
616where
617 for<'a> &'a T: NumericRef<T>,
618{
619 let features = matrix.columns();
620 let samples = T::from_usize(matrix.rows())
621 .expect("The maximum value of the matrix type T cannot represent this many samples");
622 let mut covariance_matrix = Matrix::empty(T::zero(), (features, features));
623 covariance_matrix.map_mut_with_index(|_, i, j| {
624 // set each element of the covariance matrix to the variance
625 // of features i and j
626 let feature_i_mean: T = matrix.column_iter(i).sum::<T>() / &samples;
627 let feature_j_mean: T = matrix.column_iter(j).sum::<T>() / &samples;
628 matrix
629 .column_reference_iter(i)
630 .map(|x| x - &feature_i_mean)
631 .zip(matrix.column_reference_iter(j).map(|y| y - &feature_j_mean))
632 .map(|(x, y)| x * y)
633 .sum::<T>()
634 / &samples
635 });
636 covariance_matrix
637}
638
639/**
640 * Computes the covariance matrix for an NxM feature matrix, in which
641 * each M'th column has N features to find the covariance and variance of.
642 *
643 * The covariance matrix is a matrix of how each feature varies with itself
644 * (along the diagonal) and all the other features (symmetrically above and below
645 * the diagonal).
646 *
647 * Each element in the covariance matrix at (i, j) will be the variance of the
648 * ith and jth features from the feature matrix, defined as the zero meaned
649 * dot product of the two feature vectors divided by the number of samples.
650 *
651 * If all the features in the input have a variance of one then the covariance matrix
652 * returned by this function will be equivalent to the correlation matrix of the input
653 *
654 * This function does not perform [Bessel's correction](https://en.wikipedia.org/wiki/Bessel%27s_correction)
655 *
656 * # Panics
657 *
658 * If the numeric type is unable to represent the number of samples
659 * for each feature (ie if `T: i8` and you have 1000 samples) then this function
660 * will panic.
661 *
662 * # Warning
663 *
664 * With some uses of this function the Rust compiler gets confused about what type `T`
665 * should be and you will get the error:
666 * > overflow evaluating the requirement `&'a _: easy_ml::numeric::NumericByValue<_, _>`
667 *
668 * In this case you need to manually specify the type of T by using the
669 * turbofish syntax like:
670 * `linear_algebra::covariance_row_features::<f32>(&matrix)`
671 *
672 * Alternatively, the compiler doesn't seem to run into this problem if you
673 * use the equivalent methods on the matrix type like so:
674 * `matrix.covariance_row_features()`
675 */
676pub fn covariance_row_features<T: Numeric>(matrix: &Matrix<T>) -> Matrix<T>
677where
678 for<'a> &'a T: NumericRef<T>,
679{
680 let features = matrix.rows();
681 let samples = T::from_usize(matrix.columns())
682 .expect("The maximum value of the matrix type T cannot represent this many samples");
683 let mut covariance_matrix = Matrix::empty(T::zero(), (features, features));
684 covariance_matrix.map_mut_with_index(|_, i, j| {
685 // set each element of the covariance matrix to the variance
686 // of features i and j
687 let feature_i_mean: T = matrix.row_iter(i).sum::<T>() / &samples;
688 let feature_j_mean: T = matrix.row_iter(j).sum::<T>() / &samples;
689 matrix
690 .row_reference_iter(i)
691 .map(|x| x - &feature_i_mean)
692 .zip(matrix.row_reference_iter(j).map(|y| y - &feature_j_mean))
693 .map(|(x, y)| x * y)
694 .sum::<T>()
695 / &samples
696 });
697 covariance_matrix
698}
699
700/**
701 * Computes the covariance matrix for a 2 dimensional Tensor feature matrix.
702 *
703 * The `feature_dimension` specifies which dimension holds the features. The other dimension
704 * is assumed to hold the samples. For a Tensor with a `feature_dimension` of length N, and
705 * the other dimension of length M, returns an NxN covariance matrix with a shape of
706 * `[("i", N), ("j", N)]`.
707 *
708 * Each element in the covariance matrix at (i, j) will be the variance of the ith and jth
709 * features from the feature matrix, defined as the zero meaned dot product of the two
710 * feature vectors divided by the number of samples (M).
711 *
712 * If all the features in the input have a variance of one then the covariance matrix
713 * returned by this function will be equivalent to the correlation matrix of the input
714 *
715 * This function does not perform [Bessel's correction](https://en.wikipedia.org/wiki/Bessel%27s_correction)
716 *
717 * # Panics
718 *
719 * - If the numeric type is unable to represent the number of samples for each feature
720 * (ie if `T: i8` and you have 1000 samples)
721 * - If the provided feature_dimension is not a dimension in the tensor
722 *
723 * # Warning
724 *
725 * With some uses of this function the Rust compiler gets confused about what type `T`
726 * should be and you will get the error:
727 * > overflow evaluating the requirement `&'a _: easy_ml::numeric::NumericByValue<_, _>`
728 *
729 * In this case you need to manually specify the type of T by using the
730 * turbofish syntax like:
731 * `linear_algebra::covariance::<f32, _, _>(&matrix)`
732 *
733 * Alternatively, the compiler doesn't seem to run into this problem if you
734 * use the equivalent methods on the matrix type like so:
735 * `tensor.covariance("features")`
736 *
737 * # Example
738 *
739 * ```
740 * use easy_ml::tensors::Tensor;
741 * let matrix = Tensor::from([("samples", 5), ("features", 3)], vec![
742 * // X Y Z
743 * 1.0, 0.0, 0.5,
744 * 1.2, -1.0, 0.4,
745 * 1.8, -1.2, 0.7,
746 * 0.9, 0.1, 0.3,
747 * 0.7, 0.5, 0.6
748 * ]);
749 * let covariance_matrix = matrix.covariance("features");
750 * let (x, y, z) = (0, 1, 2);
751 * let x_y_z = covariance_matrix.index();
752 * // the variance of each feature with itself is positive
753 * assert!(x_y_z.get([x, x]) > 0.0);
754 * assert!(x_y_z.get([y, y]) > 0.0);
755 * assert!(x_y_z.get([z, z]) > 0.0);
756 * // first feature X and second feature Y have negative covariance (as X goes up Y goes down)
757 * assert!(x_y_z.get([x, y]) < 0.0);
758 * println!("{}", covariance_matrix);
759 * // D = 2
760 * // ("i", 3), ("j", 3)
761 * // [ 0.142, -0.226, 0.026
762 * // -0.226, 0.438, -0.022
763 * // 0.026, -0.022, 0.020 ]
764 * ```
765 */
766#[track_caller]
767pub fn covariance<T, S, I>(tensor: I, feature_dimension: Dimension) -> Tensor<T, 2>
768where
769 T: Numeric,
770 for<'a> &'a T: NumericRef<T>,
771 I: Into<TensorView<T, S, 2>>,
772 S: TensorRef<T, 2>,
773{
774 covariance_less_generic::<T, S>(tensor.into(), feature_dimension)
775}
776
777#[track_caller]
778fn covariance_less_generic<T, S>(
779 tensor: TensorView<T, S, 2>,
780 feature_dimension: Dimension,
781) -> Tensor<T, 2>
782where
783 T: Numeric,
784 for<'a> &'a T: NumericRef<T>,
785 S: TensorRef<T, 2>,
786{
787 let shape = tensor.shape();
788 let features_index = {
789 if shape[0].0 == feature_dimension {
790 0
791 } else if shape[1].0 == feature_dimension {
792 1
793 } else {
794 panic!(
795 "Feature dimension {:?} is not present in the input tensor's shape: {:?}",
796 feature_dimension, shape
797 );
798 }
799 };
800 let (feature_dimension, features) = shape[features_index];
801 let (_sample_dimension, samples) = shape[1 - features_index];
802 let samples = T::from_usize(samples)
803 .expect("The maximum value of the matrix type T cannot represent this many samples");
804 let mut covariance_matrix = Tensor::empty([("i", features), ("j", features)], T::zero());
805 covariance_matrix.map_mut_with_index(|[i, j], _| {
806 // set each element of the covariance matrix to the variance of features i and j
807 #[rustfmt::skip]
808 let feature_i_mean: T = tensor
809 .select([(feature_dimension, i)])
810 .iter()
811 .sum::<T>() / &samples;
812 #[rustfmt::skip]
813 let feature_j_mean: T = tensor
814 .select([(feature_dimension, j)])
815 .iter()
816 .sum::<T>() / &samples;
817 tensor
818 .select([(feature_dimension, i)])
819 .iter_reference()
820 .map(|x| x - &feature_i_mean)
821 .zip(
822 tensor
823 .select([(feature_dimension, j)])
824 .iter_reference()
825 .map(|y| y - &feature_j_mean),
826 )
827 .map(|(x, y)| x * y)
828 .sum::<T>()
829 / &samples
830 });
831 covariance_matrix
832}
833
834/**
835 * Computes the mean of the values in an iterator, consuming the iterator.
836 *
837 * This function does not perform [Bessel's correction](https://en.wikipedia.org/wiki/Bessel%27s_correction)
838 *
839 * # Panics
840 *
841 * If the iterator is empty. This function will also fail if the length of the iterator
842 * or sum of all the values in the iterator exceeds the maximum number the type can
843 * represent.
844 */
845pub fn mean<I, T: Numeric>(mut data: I) -> T
846where
847 I: Iterator<Item = T>,
848{
849 let mut next = data.next();
850 assert!(next.is_some(), "Provided iterator must not be empty");
851 let mut count = T::zero();
852 let mut sum = T::zero();
853 while next.is_some() {
854 count = count + T::one();
855 sum = sum + next.unwrap();
856 next = data.next();
857 }
858 sum / count
859}
860
861/**
862 * Computes the variance of the values in an iterator, consuming the iterator.
863 *
864 * Variance is defined as expected value of of the squares of the zero mean data.
865 * It captures how much data varies from its mean, ie the spread of the data.
866 *
867 * This function does not perform [Bessel's correction](https://en.wikipedia.org/wiki/Bessel%27s_correction)
868 *
869 * Variance may also be computed as the mean of each squared datapoint minus the
870 * square of the mean of the data. Although this method would allow for a streaming
871 * implementation the [wikipedia page](https://en.wikipedia.org/wiki/Variance#Definition)
872 * cautions: "This equation should not be used for computations using floating point
873 * arithmetic because it suffers from catastrophic cancellation if the two components
874 * of the equation are similar in magnitude".
875 *
876 * # Panics
877 *
878 * If the iterator is empty. This function will also fail if the length of the iterator
879 * or sum of all the values in the iterator exceeds the maximum number the type can
880 * represent.
881 */
882pub fn variance<I, T: Numeric>(data: I) -> T
883where
884 I: Iterator<Item = T>,
885{
886 let list = data.collect::<Vec<T>>();
887 assert!(!list.is_empty(), "Provided iterator must not be empty");
888
889 // copy the list as we need to keep it as well as getting the mean
890 let m = mean(list.iter().cloned());
891 // use drain here as we no longer need to keep list
892 mean(
893 list.into_iter()
894 .map(|x| (x.clone() - m.clone()) * (x - m.clone())),
895 )
896}
897
898/**
899 * Computes the softmax of the values in an iterator, consuming the iterator.
900 *
901 * softmax(z)\[i\] = e<sup>z<sub>i</sub></sup> / the sum of e<sup>z<sub>j</sub></sup> for all j
902 * where z is a list of elements
903 *
904 * Softmax normalises an input of numbers into a probability distribution, such
905 * that they will sum to 1. This is often used to make a neural network
906 * output a single number.
907 *
908 * The implementation shifts the inputs by the maximum value in the iterator,
909 * to make numerical overflow less of a problem. As per the definition of softmax,
910 * softmax(z) = softmax(z-max(z)).
911 *
912 * [Further information](https://en.wikipedia.org/wiki/Softmax_function)
913 *
914 * # Panics
915 *
916 * If the iterator contains NaN values, or any value for which PartialOrd fails.
917 *
918 * This function will also fail if the length of the iterator or sum of all the values
919 * in the iterator exceeds the maximum or minimum number the type can represent.
920 */
921pub fn softmax<I, T: Real>(data: I) -> Vec<T>
922where
923 I: Iterator<Item = T>,
924{
925 let list = data.collect::<Vec<T>>();
926 if list.is_empty() {
927 return Vec::with_capacity(0);
928 }
929 let max = list
930 .iter()
931 .max_by(|a, b| a.partial_cmp(b).expect("NaN should not be in list"))
932 .unwrap();
933
934 let denominator: T = list.iter().cloned().map(|x| (x - max).exp()).sum();
935 list.iter()
936 .cloned()
937 .map(|x| (x - max).exp() / denominator.clone())
938 .collect()
939}
940
941/**
942 * Computes the F-1 score of the Precision and Recall
943 *
944 * 2 * (precision * recall) / (precision + recall)
945 *
946 * # [F-1 score](https://en.wikipedia.org/wiki/F1_score)
947 *
948 * This is a harmonic mean of the two, which penalises the score
949 * more heavily if either the precision or recall are poor than
950 * an arithmetic mean.
951 *
952 * The F-1 score is a helpful metric for assessing classifiers, as
953 * it takes into account that classes may be heavily biased which
954 * Accuracy does not. For example, it may be quite easy to create a
955 * 95% accurate test for a medical condition, which inuitively seems
956 * very good, but if 99.9% of patients are expected to not have the
957 * condition then accuracy is a poor way to measure performance because
958 * it does not consider that the cost of false negatives is very high.
959 *
960 * Note that Precision and Recall both depend on there being a positive
961 * and negative class for a classification task, in some contexts this may
962 * be an arbitrary choice.
963 *
964 * # [Precision](https://en.wikipedia.org/wiki/Precision_and_recall)
965 *
966 * In classification, precision is true positives / positive predictions.
967 * It measures correct identifications of the positive class compared
968 * to all predictions of the positive class. You can trivially get
969 * 100% precision by never predicting the positive class, as this can
970 * never result in a false positive.
971 *
972 * Note that the meaning of precision in classification or document
973 * retrieval is not the same as its meaning in [measurements](https://en.wikipedia.org/wiki/Accuracy_and_precision).
974 *
975 * # [Recall](https://en.wikipedia.org/wiki/Precision_and_recall)
976 *
977 * In classification, recall is true positives / actual positives.
978 * It measures how many of the positive cases are identified. You
979 * can trivially get 100% recall by always predicting the positive class,
980 * as this can never result in a false negative.
981 *
982 * [F scores](https://en.wikipedia.org/wiki/F1_score)
983 *
984 * The F-1 score is an evenly weighted combination of Precision and
985 * Recall. For domains where the cost of false positives and false
986 * negatives are not equal, you should use a biased F score that weights
987 * precision or recall more strongly than the other.
988 */
989pub fn f1_score<T: Numeric>(precision: T, recall: T) -> T {
990 (T::one() + T::one()) * ((precision.clone() * recall.clone()) / (precision + recall))
991}
992
993/**
994 * Computes the cholesky decomposition of a matrix. This yields a matrix `L`
995 * such that for the provided matrix `A`, `L * L^T = A`. `L` will always be
996 * lower triangular, ie all entries above the diagonal will be 0. Hence cholesky
997 * decomposition can be interpreted as a generalised square root function.
998 *
999 * Cholesky decomposition is defined for
1000 * [symmetric](https://en.wikipedia.org/wiki/Hermitian_matrix),
1001 * [positive definite](https://en.wikipedia.org/wiki/Definite_matrix) matrices.
1002 *
1003 * This function does not check that the provided matrix is symmetric. However, given a symmetric
1004 * input, if the input is not positive definite `None` is returned. Attempting a cholseky
1005 * decomposition is also an efficient way to check if such a matrix is positive definite.
1006 * In the future additional checks that the input is valid could be added.
1007 */
1008pub fn cholesky_decomposition<T: Numeric + Sqrt<Output = T>>(
1009 matrix: &Matrix<T>,
1010) -> Option<Matrix<T>>
1011where
1012 for<'a> &'a T: NumericRef<T>,
1013{
1014 let lower_triangular = cholesky_decomposition_less_generic::<T, _>(&TensorView::from(
1015 crate::interop::TensorRefMatrix::from(matrix).ok()?,
1016 ))?;
1017 Some(lower_triangular.into_matrix())
1018}
1019
1020/**
1021 * Computes the cholesky decomposition of a Tensor matrix. This yields a matrix `L`
1022 * such that for the provided matrix `A`, `L * L^T = A`. `L` will always be
1023 * lower triangular, ie all entries above the diagonal will be 0. Hence cholesky
1024 * decomposition can be interpreted as a generalised square root function.
1025 *
1026 * Cholesky decomposition is defined for
1027 * [symmetric](https://en.wikipedia.org/wiki/Hermitian_matrix),
1028 * [positive definite](https://en.wikipedia.org/wiki/Definite_matrix) matrices.
1029 *
1030 * This function does not check that the provided matrix is symmetric. However, given a symmetric
1031 * input, if the input is not positive definite `None` is returned. Attempting a cholseky
1032 * decomposition is also an efficient way to check if such a matrix is positive definite.
1033 * In the future additional checks that the input is valid could be added.
1034 *
1035 * The output matrix will have the same shape as the input.
1036 */
1037pub fn cholesky_decomposition_tensor<T, S, I>(tensor: I) -> Option<Tensor<T, 2>>
1038where
1039 T: Numeric + Sqrt<Output = T>,
1040 for<'a> &'a T: NumericRef<T>,
1041 I: Into<TensorView<T, S, 2>>,
1042 S: TensorRef<T, 2>,
1043{
1044 cholesky_decomposition_less_generic::<T, S>(&tensor.into())
1045}
1046
1047#[rustfmt::skip]
1048fn cholesky_decomposition_less_generic<T, S>(tensor: &TensorView<T, S, 2>) -> Option<Tensor<T, 2>>
1049where
1050 T: Numeric + Sqrt<Output = T>,
1051 for<'a> &'a T: NumericRef<T>,
1052 S: TensorRef<T, 2>,
1053{
1054 let shape = tensor.shape();
1055 if !crate::tensors::dimensions::is_square(&shape) {
1056 return None;
1057 }
1058 // The computation steps are outlined nicely at https://rosettacode.org/wiki/Cholesky_decomposition
1059 let mut lower_triangular = Tensor::empty(shape, T::zero());
1060 let mut lower_triangular_indexing = lower_triangular.index_mut();
1061 let tensor_indexing = tensor.index();
1062 let n = shape[0].1;
1063
1064 for i in 0..n {
1065 // For each column j we need to compute all i, j entries
1066 // before incrementing j further as the diagonals depend
1067 // on the elements below the diagonal of the previous columns,
1068 // and the elements below the diagonal depend on the diagonal
1069 // of their column and elements below the diagonal up to that
1070 // column.
1071
1072 for j in 0..=i {
1073 // For the i = j case we compute the sum of squares, otherwise we're
1074 // computing a sum of L_ik * L_jk using the current column and prior columns
1075 let sum = {
1076 let mut sum = T::zero();
1077 for k in 0..j {
1078 sum = &sum
1079 + (lower_triangular_indexing.get_ref([i, k])
1080 * lower_triangular_indexing.get_ref([j, k]));
1081 }
1082 sum
1083 };
1084 // Calculate L_ij as we step through the lower diagonal
1085 *lower_triangular_indexing.get_ref_mut([i, j]) = if i == j {
1086 let entry_squared = tensor_indexing.get_ref([i, j]) - sum;
1087 if entry_squared <= T::zero() {
1088 // input wasn't positive definite! avoid sqrt of a negative number.
1089 // We can take sqrt(0) but that will leave a 0 on the diagonal which
1090 // will then cause division by zero for the j < i case later.
1091 return None;
1092 }
1093 entry_squared.sqrt()
1094 } else /* j < i */ {
1095 (tensor_indexing.get_ref([i, j]) - sum)
1096 * (T::one() / lower_triangular_indexing.get_ref([j, j]))
1097 };
1098 }
1099 }
1100 Some(lower_triangular)
1101}
1102
1103/**
1104 * The result of an `LDL^T` Decomposition of some matrix `A` such that `LDL^T = A`.
1105 */
1106#[derive(Clone, Debug)]
1107#[non_exhaustive]
1108pub struct LDLTDecomposition<T> {
1109 pub l: Matrix<T>,
1110 pub d: Matrix<T>,
1111}
1112
1113impl<T: std::fmt::Display + Clone> std::fmt::Display for LDLTDecomposition<T> {
1114 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
1115 writeln!(f, "L:\n{}", &self.l)?;
1116 write!(f, "D:\n{}", &self.d)
1117 }
1118}
1119
1120impl<T> LDLTDecomposition<T> {
1121 /**
1122 * Creates an `LDL^T` Decomposition struct from two matrices without checking that `LDL^T = A`
1123 * or that L and D have the intended properties.
1124 *
1125 * This is provided for assistance with unit testing.
1126 */
1127 pub fn from_unchecked(l: Matrix<T>, d: Matrix<T>) -> LDLTDecomposition<T> {
1128 LDLTDecomposition { l, d }
1129 }
1130}
1131
1132/**
1133 * Computes the LDL^T decomposition of a matrix. This yields a matrix `L` and a matrix `D`
1134 * such that for the provided matrix `A`, `L * D * L^T = A`. `L` will always be
1135 * unit lower triangular, ie all entries above the diagonal will be 0, and all entries along
1136 * the diagonal will br 1. `D` will always contain zeros except along the diagonal. This
1137 * decomposition is closely related to the [cholesky decomposition](cholesky_decomposition)
1138 * with the notable difference that it avoids taking square roots.
1139 *
1140 * Similarly to the cholseky decomposition, the input matrix must be
1141 * [symmetric](https://en.wikipedia.org/wiki/Hermitian_matrix) and
1142 * [positive definite](https://en.wikipedia.org/wiki/Definite_matrix).
1143 *
1144 * This function does not check that the provided matrix is symmetric. However, given a symmetric
1145 * input, if the input is only positive **semi**definite `None` is returned. In the future
1146 * additional checks that the input is valid could be added.
1147 *
1148 * # Warning
1149 *
1150 * With some uses of this function the Rust compiler gets confused about what type `T`
1151 * should be and you will get the error:
1152 * > overflow evaluating the requirement `&'a _: easy_ml::numeric::NumericByValue<_, _>`
1153 *
1154 * In this case you need to manually specify the type of T by using the
1155 * turbofish syntax like:
1156 * `linear_algebra::ldlt_decomposition::<f32>(&matrix)`
1157 */
1158pub fn ldlt_decomposition<T>(matrix: &Matrix<T>) -> Option<LDLTDecomposition<T>>
1159where
1160 T: Numeric,
1161 for<'a> &'a T: NumericRef<T>,
1162{
1163 let decomposition = ldlt_decomposition_less_generic::<T, _>(&TensorView::from(
1164 crate::interop::TensorRefMatrix::from(matrix).ok()?,
1165 ))?;
1166 Some(LDLTDecomposition {
1167 l: decomposition.l.into_matrix(),
1168 d: decomposition.d.into_matrix(),
1169 })
1170}
1171
1172/**
1173 * The result of an `LDL^T` Decomposition of some matrix `A` such that `LDL^T = A`.
1174 */
1175#[derive(Clone, Debug)]
1176#[non_exhaustive]
1177pub struct LDLTDecompositionTensor<T> {
1178 pub l: Tensor<T, 2>,
1179 pub d: Tensor<T, 2>,
1180}
1181
1182impl<T: std::fmt::Display + Clone> std::fmt::Display for LDLTDecompositionTensor<T> {
1183 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
1184 writeln!(f, "L:\n{}", &self.l)?;
1185 write!(f, "D:\n{}", &self.d)
1186 }
1187}
1188
1189impl<T> LDLTDecompositionTensor<T> {
1190 /**
1191 * Creates an `LDL^T` Decomposition struct from two matrices without checking that `LDL^T = A`
1192 * or that L and D have the intended properties.
1193 *
1194 * This is provided for assistance with unit testing.
1195 */
1196 pub fn from_unchecked(l: Tensor<T, 2>, d: Tensor<T, 2>) -> LDLTDecompositionTensor<T> {
1197 LDLTDecompositionTensor { l, d }
1198 }
1199}
1200
1201/**
1202 * Computes the LDL^T decomposition of a matrix. This yields a matrix `L` and a matrix `D`
1203 * such that for the provided matrix `A`, `L * D * L^T = A`. `L` will always be
1204 * unit lower triangular, ie all entries above the diagonal will be 0, and all entries along
1205 * the diagonal will br 1. `D` will always contain zeros except along the diagonal. This
1206 * decomposition is closely related to the [cholesky decomposition](cholesky_decomposition)
1207 * with the notable difference that it avoids taking square roots.
1208 *
1209 * Similarly to the cholseky decomposition, the input matrix must be
1210 * [symmetric](https://en.wikipedia.org/wiki/Hermitian_matrix) and
1211 * [positive definite](https://en.wikipedia.org/wiki/Definite_matrix).
1212 *
1213 * This function does not check that the provided matrix is symmetric. However, given a symmetric
1214 * input, if the input is only positive **semi**definite `None` is returned. In the future
1215 * additional checks that the input is valid could be added.
1216 *
1217 * The shapes of the `L` and `D` matrices will be the same as the input matrix.
1218 *
1219 * # Warning
1220 *
1221 * With some uses of this function the Rust compiler gets confused about what type `T`
1222 * should be and you will get the error:
1223 * > overflow evaluating the requirement `&'a _: easy_ml::numeric::NumericByValue<_, _>`
1224 *
1225 * In this case you need to manually specify the type of T by using the
1226 * turbofish syntax like:
1227 * `linear_algebra::ldlt_decomposition_tensor::<f32, _, _>(&tensor)`
1228 */
1229pub fn ldlt_decomposition_tensor<T, S, I>(tensor: I) -> Option<LDLTDecompositionTensor<T>>
1230where
1231 T: Numeric,
1232 for<'a> &'a T: NumericRef<T>,
1233 I: Into<TensorView<T, S, 2>>,
1234 S: TensorRef<T, 2>,
1235{
1236 ldlt_decomposition_less_generic::<T, S>(&tensor.into())
1237}
1238
1239fn ldlt_decomposition_less_generic<T, S>(
1240 tensor: &TensorView<T, S, 2>,
1241) -> Option<LDLTDecompositionTensor<T>>
1242where
1243 T: Numeric,
1244 for<'a> &'a T: NumericRef<T>,
1245 S: TensorRef<T, 2>,
1246{
1247 // The algorithm is outlined nicely in context as Algorithm 1.2 here:
1248 // https://mcsweeney90.github.io/files/modified-cholesky-decomposition-and-applications.pdf
1249 // and also as proper code here (though a less efficient solution):
1250 // https://astroanddata.blogspot.com/2020/04/ldl-decomposition-with-python.html
1251 let shape = tensor.shape();
1252 if !crate::tensors::dimensions::is_square(&shape) {
1253 return None;
1254 }
1255 let mut lower_triangular = Tensor::empty(shape, T::zero());
1256 let mut diagonal = Tensor::empty(shape, T::zero());
1257 let mut lower_triangular_indexing = lower_triangular.index_mut();
1258 let mut diagonal_indexing = diagonal.index_mut();
1259 let tensor_indexing = tensor.index();
1260 let n = shape[0].1;
1261
1262 for j in 0..n {
1263 #[rustfmt::skip]
1264 let sum = {
1265 let mut sum = T::zero();
1266 for k in 0..j {
1267 sum = &sum + (
1268 lower_triangular_indexing.get_ref([j, k]) *
1269 lower_triangular_indexing.get_ref([j, k]) *
1270 diagonal_indexing.get_ref([k, k])
1271 );
1272 }
1273 sum
1274 };
1275 *diagonal_indexing.get_ref_mut([j, j]) = {
1276 let entry = tensor_indexing.get_ref([j, j]) - sum;
1277 if entry == T::zero() {
1278 // If input is positive definite then no diagonal will be 0. Otherwise we
1279 // fail the decomposition to avoid division by zero in the j < i case later.
1280 // Note: unlike cholseky, negatives here are fine since we can still perform
1281 // the calculations sensibly.
1282 return None;
1283 }
1284 entry
1285 };
1286 for i in j..n {
1287 #[rustfmt::skip]
1288 let x = if i == j {
1289 T::one()
1290 } else /* j < i */ {
1291 let sum = {
1292 let mut sum = T::zero();
1293 for k in 0..j {
1294 sum = &sum + (
1295 lower_triangular_indexing.get_ref([i, k]) *
1296 lower_triangular_indexing.get_ref([j, k]) *
1297 diagonal_indexing.get_ref([k, k])
1298 );
1299 }
1300 sum
1301 };
1302 (tensor_indexing.get_ref([i, j]) - sum) *
1303 (T::one() / diagonal_indexing.get_ref([j, j]))
1304 };
1305 *lower_triangular_indexing.get_ref_mut([i, j]) = x;
1306 }
1307 }
1308 Some(LDLTDecompositionTensor {
1309 l: lower_triangular,
1310 d: diagonal,
1311 })
1312}
1313
1314/**
1315 * The result of a QR Decomposition of some matrix A such that `QR = A`.
1316 */
1317#[derive(Clone, Debug)]
1318#[non_exhaustive]
1319pub struct QRDecomposition<T> {
1320 pub q: Matrix<T>,
1321 pub r: Matrix<T>,
1322}
1323
1324impl<T: std::fmt::Display + Clone> std::fmt::Display for QRDecomposition<T> {
1325 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
1326 writeln!(f, "Q:\n{}", &self.q)?;
1327 write!(f, "R:\n{}", &self.r)
1328 }
1329}
1330
1331impl<T> QRDecomposition<T> {
1332 /**
1333 * Creates a QR Decomposition struct from two matrices without checking that `QR = A`
1334 * or that Q and R have the intended properties.
1335 *
1336 * This is provided for assistance with unit testing.
1337 */
1338 pub fn from_unchecked(q: Matrix<T>, r: Matrix<T>) -> QRDecomposition<T> {
1339 QRDecomposition { q, r }
1340 }
1341}
1342
1343fn householder_matrix_tensor<T: Real>(vector: Vec<T>, names: [Dimension; 2]) -> Tensor<T, 2>
1344where
1345 for<'a> &'a T: RealRef<T>,
1346{
1347 // The computation steps are outlined nicely at https://en.wikipedia.org/wiki/QR_decomposition#Using_Householder_reflections
1348 // Supporting reference implementations are on Rosettacode https://rosettacode.org/wiki/QR_decomposition
1349 // we hardcode to taking the first column vector of the input matrix
1350 let x = Tensor::from([("r", vector.len())], vector);
1351 let shape = x.shape();
1352 let rows = shape[0].1;
1353 let length = x.euclidean_length();
1354 let a = {
1355 // we hardcode to wanting to zero all elements below the first
1356 let sign = x.first();
1357 if sign > T::zero() { length } else { -length }
1358 };
1359 let u = {
1360 // u = x - ae, where e is [1 0 0 0 ... 0]^T, and x is the column vector so
1361 // u is equal to x except for the first element.
1362 // Also, we invert the sign of a to avoid loss of significance, so u[0] becomes x[0] + a
1363 let mut u = x;
1364 let mut u_indexing = u.index_mut();
1365 *u_indexing.get_ref_mut([0]) = u_indexing.get_ref([0]) + a;
1366 u
1367 };
1368 // v = u / ||u||
1369 let v = {
1370 let length = u.euclidean_length();
1371 u.map(|element| element / &length)
1372 };
1373 let identity = Tensor::diagonal([(names[0], rows), (names[1], rows)], T::one());
1374 let two = T::one() + T::one();
1375 let v_column = Tensor::from([(names[0], rows), (names[1], 1)], v.iter().collect());
1376 let v_row = Tensor::from([(names[0], 1), (names[1], rows)], v.iter().collect());
1377 // I - 2 v v^T
1378 identity - ((v_column * v_row) * two)
1379}
1380
1381/**
1382 * Computes a QR decomposition of a MxN matrix where M >= N.
1383 *
1384 * For an input matrix A, decomposes this matrix into a product of QR, where Q is an
1385 * [orthogonal matrix](https://en.wikipedia.org/wiki/Orthogonal_matrix) and R is an
1386 * upper triangular matrix (all entries below the diagonal are 0), and QR = A.
1387 *
1388 * If the input matrix has more columns than rows, returns None.
1389 *
1390 * # Warning
1391 *
1392 * With some uses of this function the Rust compiler gets confused about what type `T`
1393 * should be and you will get the error:
1394 * > overflow evaluating the requirement `&'a _: easy_ml::numeric::RealByValue<_, _>`
1395 *
1396 * In this case you need to manually specify the type of T by using the
1397 * turbofish syntax like:
1398 * `linear_algebra::qr_decomposition::<f32>(&matrix)`
1399 */
1400pub fn qr_decomposition<T: Real>(matrix: &Matrix<T>) -> Option<QRDecomposition<T>>
1401where
1402 for<'a> &'a T: RealRef<T>,
1403{
1404 let decomposition = qr_decomposition_less_generic::<T, _>(&TensorView::from(
1405 crate::interop::TensorRefMatrix::from(matrix).ok()?,
1406 ))?;
1407 Some(QRDecomposition {
1408 q: decomposition.q.into_matrix(),
1409 r: decomposition.r.into_matrix(),
1410 })
1411}
1412
1413/**
1414 * The result of a QR Decomposition of some matrix A such that `QR = A`.
1415 */
1416#[derive(Clone, Debug)]
1417#[non_exhaustive]
1418pub struct QRDecompositionTensor<T> {
1419 pub q: Tensor<T, 2>,
1420 pub r: Tensor<T, 2>,
1421}
1422
1423impl<T: std::fmt::Display + Clone> std::fmt::Display for QRDecompositionTensor<T> {
1424 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
1425 writeln!(f, "Q:\n{}", &self.q)?;
1426 write!(f, "R:\n{}", &self.r)
1427 }
1428}
1429
1430impl<T> QRDecompositionTensor<T> {
1431 /**
1432 * Creates a QR Decomposition struct from two matrices without checking that `QR = A`
1433 * or that Q and R have the intended properties.
1434 *
1435 * This is provided for assistance with unit testing.
1436 */
1437 pub fn from_unchecked(q: Tensor<T, 2>, r: Tensor<T, 2>) -> QRDecompositionTensor<T> {
1438 QRDecompositionTensor { q, r }
1439 }
1440}
1441
1442/**
1443 * Computes a QR decomposition of a MxN matrix where M >= N.
1444 *
1445 * For an input matrix A, decomposes this matrix into a product of QR, where Q is an
1446 * [orthogonal matrix](https://en.wikipedia.org/wiki/Orthogonal_matrix) and R is an
1447 * upper triangular matrix (all entries below the diagonal are 0), and QR = A.
1448 *
1449 * The first dimension in the Tensor's shape will be taken as the rows of the matrix, and the
1450 * second dimension as the columns. If you instead have columns and then rows for the Tensor's
1451 * shape, you should reorder the Tensor before calling this function to get the appropriate matrix.
1452 *
1453 * If the input matrix has more columns than rows, returns None.
1454 *
1455 * The shape of R will be the same as the input, and the shape of Q will be of lengths MxM
1456 * in relation to the MxN input matrix with the same dimension names as the input. Hence, QR
1457 * yields the same shape as the input.
1458 *
1459 * # Warning
1460 *
1461 * With some uses of this function the Rust compiler gets confused about what type `T`
1462 * should be and you will get the error:
1463 * > overflow evaluating the requirement `&'a _: easy_ml::numeric::RealByValue<_, _>`
1464 *
1465 * In this case you need to manually specify the type of T by using the
1466 * turbofish syntax like:
1467 * `linear_algebra::qr_decomposition_tensor::<f32, _, _>(&tensor)`
1468 */
1469pub fn qr_decomposition_tensor<T, S, I>(tensor: I) -> Option<QRDecompositionTensor<T>>
1470where
1471 T: Real,
1472 for<'a> &'a T: RealRef<T>,
1473 I: Into<TensorView<T, S, 2>>,
1474 S: TensorRef<T, 2>,
1475{
1476 qr_decomposition_less_generic::<T, S>(&tensor.into())
1477}
1478
1479fn qr_decomposition_less_generic<T, S>(
1480 tensor: &TensorView<T, S, 2>,
1481) -> Option<QRDecompositionTensor<T>>
1482where
1483 T: Real,
1484 for<'a> &'a T: RealRef<T>,
1485 S: TensorRef<T, 2>,
1486{
1487 let shape = tensor.shape();
1488 let names = crate::tensors::dimensions::names_of(&shape);
1489 let rows = shape[0].1;
1490 let columns = shape[1].1;
1491 if columns > rows {
1492 return None;
1493 }
1494 // The computation steps are outlined nicely at https://en.wikipedia.org/wiki/QR_decomposition#Using_Householder_reflections
1495 // Supporting reference implementations are at Rosettacode https://rosettacode.org/wiki/QR_decomposition
1496 let iterations = std::cmp::min(rows - 1, columns);
1497 let mut q = None;
1498 let mut r = tensor.map(|x| x);
1499 for c in 0..iterations {
1500 // Conceptually, on each iteration we take a minor of r to retain the bottom right of
1501 // the matrix, with one fewer row/column on each iteration since that will have already
1502 // been zeroed. However, we then immediately discard all but the first column of that
1503 // minor, so we skip the minor step and compute directly the first column of the minor
1504 // we would have taken.
1505 let submatrix_first_column = r
1506 .select([(shape[1].0, c)])
1507 .iter()
1508 .skip(c)
1509 .collect::<Vec<_>>();
1510 // compute the (M-column)x(M-column) householder matrix
1511 let h = householder_matrix_tensor::<T>(submatrix_first_column, names);
1512 // pad the h into the bottom right of an identity matrix so it is MxM
1513 // like so:
1514 // 1 0 0
1515 // 0 H H
1516 // 0 H H
1517 let h = {
1518 let h_indexing = h.index();
1519 let mut identity = Tensor::diagonal([(shape[0].0, rows), (shape[1].0, rows)], T::one());
1520 // the column we're on is the same as how many steps we inset the
1521 // householder matrix into the identity
1522 let inset = c;
1523 for ([i, j], x) in identity.iter_reference_mut().with_index() {
1524 if i >= inset && j >= inset {
1525 *x = h_indexing.get([i - inset, j - inset]);
1526 }
1527 }
1528 identity
1529 };
1530 // R = H_n * ... H_3 * H_2 * H_1 * A
1531 r = &h * r;
1532 // Q = H_1 * H_2 * H_3 .. H_n
1533 match q {
1534 None => q = Some(h),
1535 Some(h_previous) => q = Some(h_previous * h),
1536 }
1537 }
1538 Some(QRDecompositionTensor {
1539 // This should always be Some because the input matrix has to be at least 1x1
1540 q: q.unwrap(),
1541 r,
1542 })
1543}