single_svdlib/randomized/
mod.rs

1use crate::error::SvdLibError;
2use crate::{Diagnostics, SMat, SvdFloat, SvdRec};
3use nalgebra_sparse::na::{ComplexField, DMatrix, DVector, RealField};
4use ndarray::Array1;
5use nshare::IntoNdarray2;
6use rand::prelude::{Distribution, StdRng};
7use rand::SeedableRng;
8use rand_distr::Normal;
9use rayon::iter::ParallelIterator;
10use rayon::prelude::IntoParallelIterator;
11use std::ops::Mul;
12use std::time::Instant;
13
14#[derive(Debug, Clone, Copy, PartialEq)]
15pub enum PowerIterationNormalizer {
16    QR,
17    LU,
18    None,
19}
20
21const PARALLEL_THRESHOLD_ROWS: usize = 5000;
22const PARALLEL_THRESHOLD_COLS: usize = 1000;
23
24pub fn randomized_svd<T, M>(
25    m: &M,
26    target_rank: usize,
27    n_oversamples: usize,
28    n_power_iters: usize,
29    power_iteration_normalizer: PowerIterationNormalizer,
30    mean_center: bool,
31    seed: Option<u64>,
32    verbose: bool,
33) -> anyhow::Result<SvdRec<T>>
34where
35    T: SvdFloat + RealField,
36    M: SMat<T> + std::marker::Sync,
37    T: ComplexField,
38{
39    let start = Instant::now();
40    let m_rows = m.nrows();
41    let m_cols = m.ncols();
42
43    let rank = target_rank.min(m_rows.min(m_cols));
44    let l = rank + n_oversamples;
45
46    let column_means: Option<DVector<T>> = if mean_center {
47        if verbose {
48            println!("SVD | Randomized | Computing column means....");
49        }
50        Some(DVector::from(m.compute_column_means()))
51    } else {
52        None
53    };
54    if verbose && mean_center {
55        println!(
56            "SVD | Randomized | Computed column means, took: {:?} of total running time",
57            start.elapsed()
58        );
59    }
60
61    let omega = generate_random_matrix(m_cols, l, seed);
62
63    let mut y = DMatrix::<T>::zeros(m_rows, l);
64    if verbose {
65        println!("SVD | Randomized | Multiplying m with omega matrix....");
66    }
67    multiply_matrix_centered(m, &omega, &mut y, false, &column_means);
68    if verbose {
69        println!(
70            "SVD | Randomized | Multiplication done, took: {:?} of total running time",
71            start.elapsed()
72        );
73    }
74    if verbose {
75        println!("SVD | Randomized | Starting power iterations....");
76    }
77    if n_power_iters > 0 {
78        let mut z = DMatrix::<T>::zeros(m_cols, l);
79
80        for i in 0..n_power_iters {
81            if verbose {
82                println!(
83                    "SVD | Randomized | Running power-iteration: {:?}, current time: {:?}",
84                    i,
85                    start.elapsed()
86                );
87            }
88            multiply_matrix_centered(m, &y, &mut z, true, &column_means);
89            if verbose {
90                println!(
91                    "SVD | Randomized | Forward Multiplication {:?}",
92                    start.elapsed()
93                );
94            }
95            match power_iteration_normalizer {
96                PowerIterationNormalizer::QR => {
97                    let qr = z.qr();
98                    z = qr.q();
99                    // After QR normalization, z has fewer columns, so we need to resize y
100                    y = DMatrix::<T>::zeros(m_rows, z.ncols());
101                }
102                PowerIterationNormalizer::LU => {
103                    normalize_columns(&mut z);
104                }
105                PowerIterationNormalizer::None => {}
106            }
107            if verbose {
108                println!(
109                    "SVD | Randomized | Power Iteration Normalization Forward-Step {:?}",
110                    start.elapsed()
111                );
112            }
113
114            multiply_matrix_centered(m, &z, &mut y, false, &column_means);
115            if verbose {
116                println!(
117                    "SVD | Randomized | Backward Multiplication {:?}",
118                    start.elapsed()
119                );
120            }
121            match power_iteration_normalizer {
122                PowerIterationNormalizer::QR => {
123                    let qr = y.qr();
124                    y = qr.q();
125                }
126                PowerIterationNormalizer::LU => normalize_columns(&mut y),
127                PowerIterationNormalizer::None => {}
128            }
129            if verbose {
130                println!(
131                    "SVD | Randomized | Power Iteration Normalization Backward-Step {:?}",
132                    start.elapsed()
133                );
134            }
135        }
136    }
137    if verbose {
138        println!(
139            "SVD | Randomized | Running QR-Normalization after Power-Iterations {:?}",
140            start.elapsed()
141        );
142    }
143    let qr = y.qr();
144    let y = qr.q();
145    if verbose {
146        println!(
147            "SVD | Randomized | Finished QR-Normalization after Power-Iterations {:?}",
148            start.elapsed()
149        );
150    }
151
152    let mut b = DMatrix::<T>::zeros(y.ncols(), m_cols);
153    multiply_transposed_by_matrix_centered(&y, m, &mut b, &column_means);
154    if verbose {
155        println!(
156            "SVD | Randomized | Transposed Matrix Multiplication {:?}",
157            start.elapsed()
158        );
159    }
160    let svd = b.svd(true, true);
161    if verbose {
162        println!(
163            "SVD | Randomized | Running Singular Value Decomposition, took {:?}",
164            start.elapsed()
165        );
166    }
167    let u_b = svd
168        .u
169        .ok_or_else(|| SvdLibError::Las2Error("SVD U computation failed".to_string()))?;
170    let singular_values = svd.singular_values;
171    let vt = svd
172        .v_t
173        .ok_or_else(|| SvdLibError::Las2Error("SVD V_t computation failed".to_string()))?;
174
175    let u = y.mul(&u_b);
176    let actual_rank = target_rank.min(singular_values.len());
177
178    let u_subset = u.columns(0, actual_rank);
179    let s = convert_singular_values(
180        <DVector<T>>::from(singular_values.rows(0, actual_rank)),
181        actual_rank,
182    );
183    let vt_subset = vt.rows(0, actual_rank).into_owned();
184    let u = u_subset.into_owned().into_ndarray2();
185    let vt = vt_subset.into_ndarray2();
186    Ok(SvdRec {
187        d: actual_rank,
188        u,
189        s,
190        vt,
191        diagnostics: create_diagnostics(
192            m,
193            actual_rank,
194            target_rank,
195            n_power_iters,
196            seed.unwrap_or(0) as u32,
197        ),
198    })
199}
200
201fn convert_singular_values<T: SvdFloat + ComplexField>(
202    values: DVector<T::RealField>,
203    size: usize,
204) -> Array1<T> {
205    let mut array = Array1::zeros(size);
206
207    for i in 0..size {
208        array[i] = T::from_real(values[i].clone());
209    }
210
211    array
212}
213
214fn create_diagnostics<T, M: SMat<T>>(
215    a: &M,
216    d: usize,
217    target_rank: usize,
218    power_iterations: usize,
219    seed: u32,
220) -> Diagnostics<T>
221where
222    T: SvdFloat,
223{
224    Diagnostics {
225        non_zero: a.nnz(),
226        dimensions: target_rank,
227        iterations: power_iterations,
228        transposed: false,
229        lanczos_steps: 0, // we dont do that
230        ritz_values_stabilized: d,
231        significant_values: d,
232        singular_values: d,
233        end_interval: [T::from(-1e-30).unwrap(), T::from(1e-30).unwrap()],
234        kappa: T::from(1e-6).unwrap(),
235        random_seed: seed,
236    }
237}
238
239fn normalize_columns<T: SvdFloat + RealField + Send + Sync>(matrix: &mut DMatrix<T>) {
240    let rows = matrix.nrows();
241    let cols = matrix.ncols();
242
243    if rows < PARALLEL_THRESHOLD_ROWS && cols < PARALLEL_THRESHOLD_COLS {
244        for j in 0..cols {
245            let mut norm = T::zero();
246
247            // Calculate column norm
248            for i in 0..rows {
249                norm += ComplexField::powi(matrix[(i, j)], 2);
250            }
251            norm = ComplexField::sqrt(norm);
252
253            if norm > T::from_f64(1e-10).unwrap() {
254                let scale = T::one() / norm;
255                for i in 0..rows {
256                    matrix[(i, j)] *= scale;
257                }
258            }
259        }
260        return;
261    }
262
263    let norms: Vec<T> = (0..cols)
264        .into_par_iter()
265        .map(|j| {
266            let mut norm = T::zero();
267            for i in 0..rows {
268                let val = unsafe { *matrix.get_unchecked((i, j)) };
269                norm += ComplexField::powi(val, 2);
270            }
271            ComplexField::sqrt(norm)
272        })
273        .collect();
274
275    let scales: Vec<(usize, T)> = norms
276        .into_iter()
277        .enumerate()
278        .filter_map(|(j, norm)| {
279            if norm > T::from_f64(1e-10).unwrap() {
280                Some((j, T::one() / norm))
281            } else {
282                None // Skip columns with too small norms
283            }
284        })
285        .collect();
286
287    scales.iter().for_each(|(j, scale)| {
288        for i in 0..rows {
289            let value = matrix.get_mut((i, *j)).unwrap();
290            *value = value.clone() * scale.clone();
291        }
292    });
293}
294
295// ----------------------------------------
296// Utils Functions
297// ----------------------------------------
298
299fn generate_random_matrix<T: SvdFloat + RealField>(
300    rows: usize,
301    cols: usize,
302    seed: Option<u64>,
303) -> DMatrix<T> {
304    let mut rng = match seed {
305        Some(s) => StdRng::seed_from_u64(s),
306        None => StdRng::seed_from_u64(0),
307    };
308
309    let normal = Normal::new(0.0, 1.0).unwrap();
310    DMatrix::from_fn(rows, cols, |_, _| {
311        T::from_f64(normal.sample(&mut rng)).unwrap()
312    })
313}
314
315fn multiply_matrix<T: SvdFloat, M: SMat<T>>(
316    sparse: &M,
317    dense: &DMatrix<T>,
318    result: &mut DMatrix<T>,
319    transpose_sparse: bool,
320) {
321    sparse.multiply_with_dense(dense, result, transpose_sparse)
322}
323
324fn multiply_transposed_by_matrix<T: SvdFloat, M: SMat<T> + std::marker::Sync>(
325    q: &DMatrix<T>,
326    sparse: &M,
327    result: &mut DMatrix<T>,
328) {
329    sparse.multiply_transposed_by_dense(q, result);
330}
331
332pub fn svd_flip<T: SvdFloat + 'static>(
333    u: Option<&mut DMatrix<T>>,
334    v: Option<&mut DMatrix<T>>,
335    u_based_decision: bool,
336) -> Result<(), SvdLibError> {
337    if u.is_none() && v.is_none() {
338        return Err(SvdLibError::Las2Error(
339            "Both u and v cannot be None".to_string(),
340        ));
341    }
342
343    if u_based_decision {
344        if u.is_none() {
345            return Err(SvdLibError::Las2Error(
346                "u cannot be None when u_based_decision is true".to_string(),
347            ));
348        }
349
350        let u = u.unwrap();
351        let ncols = u.ncols();
352        let nrows = u.nrows();
353
354        let mut signs = DVector::from_element(ncols, T::one());
355
356        for j in 0..ncols {
357            let mut max_abs = T::zero();
358            let mut max_idx = 0;
359
360            for i in 0..nrows {
361                let abs_val = num_traits::Float::abs(u[(i, j)]);
362                if abs_val > max_abs {
363                    max_abs = abs_val;
364                    max_idx = i;
365                }
366            }
367
368            if u[(max_idx, j)] < T::zero() {
369                signs[j] = -T::one();
370            }
371        }
372
373        for j in 0..ncols {
374            for i in 0..nrows {
375                u[(i, j)] *= signs[j];
376            }
377        }
378
379        if let Some(v) = v {
380            let v_nrows = v.nrows();
381            let v_ncols = v.ncols();
382
383            for i in 0..v_nrows.min(signs.len()) {
384                for j in 0..v_ncols {
385                    v[(i, j)] *= signs[i];
386                }
387            }
388        }
389    } else {
390        if v.is_none() {
391            return Err(SvdLibError::Las2Error(
392                "v cannot be None when u_based_decision is false".to_string(),
393            ));
394        }
395
396        let v = v.unwrap();
397        let nrows = v.nrows();
398        let ncols = v.ncols();
399
400        let mut signs = DVector::from_element(nrows, T::one());
401
402        for i in 0..nrows {
403            let mut max_abs = T::zero();
404            let mut max_idx = 0;
405
406            for j in 0..ncols {
407                let abs_val = num_traits::Float::abs(v[(i, j)]);
408                if abs_val > max_abs {
409                    max_abs = abs_val;
410                    max_idx = j;
411                }
412            }
413
414            if v[(i, max_idx)] < T::zero() {
415                signs[i] = -T::one();
416            }
417        }
418
419        for i in 0..nrows {
420            for j in 0..ncols {
421                v[(i, j)] *= signs[i];
422            }
423        }
424
425        if let Some(u) = u {
426            let u_nrows = u.nrows();
427            let u_ncols = u.ncols();
428
429            for j in 0..u_ncols.min(signs.len()) {
430                for i in 0..u_nrows {
431                    u[(i, j)] *= signs[j];
432                }
433            }
434        }
435    }
436
437    Ok(())
438}
439
440fn multiply_matrix_centered<T: SvdFloat, M: SMat<T> + std::marker::Sync>(
441    sparse: &M,
442    dense: &DMatrix<T>,
443    result: &mut DMatrix<T>,
444    transpose_sparse: bool,
445    column_means: &Option<DVector<T>>,
446) {
447    if column_means.is_none() {
448        multiply_matrix(sparse, dense, result, transpose_sparse);
449        return;
450    }
451
452    let means = column_means.as_ref().unwrap();
453    sparse.multiply_with_dense_centered(dense, result, transpose_sparse, means)
454}
455
456fn multiply_transposed_by_matrix_centered<T: SvdFloat, M: SMat<T> + std::marker::Sync>(
457    q: &DMatrix<T>,
458    sparse: &M,
459    result: &mut DMatrix<T>,
460    column_means: &Option<DVector<T>>,
461) {
462     if column_means.is_none() {
463        multiply_transposed_by_matrix(q, sparse, result);
464        return;
465    }
466
467    let means = column_means.as_ref().unwrap();
468    sparse.multiply_transposed_by_dense_centered(q, result, means);
469}
470
471#[cfg(test)]
472mod randomized_svd_tests {
473    use super::*;
474    use crate::randomized::{randomized_svd, PowerIterationNormalizer};
475    use nalgebra_sparse::coo::CooMatrix;
476    use nalgebra_sparse::CsrMatrix;
477    use ndarray::Array2;
478    use rand::rngs::StdRng;
479    use rand::{Rng, SeedableRng};
480    use rayon::ThreadPoolBuilder;
481    use std::sync::Once;
482
483    static INIT: Once = Once::new();
484
485    fn setup_thread_pool() {
486        INIT.call_once(|| {
487            ThreadPoolBuilder::new()
488                .num_threads(16)
489                .build_global()
490                .expect("Failed to build global thread pool");
491
492            println!("Initialized thread pool with {} threads", 16);
493        });
494    }
495
496    fn create_sparse_matrix(
497        rows: usize,
498        cols: usize,
499        density: f64,
500    ) -> nalgebra_sparse::coo::CooMatrix<f64> {
501        use std::collections::HashSet;
502
503        let mut coo = nalgebra_sparse::coo::CooMatrix::new(rows, cols);
504
505        let mut rng = StdRng::seed_from_u64(42);
506
507        let nnz = (rows as f64 * cols as f64 * density).round() as usize;
508
509        let nnz = nnz.max(1);
510
511        let mut positions = HashSet::new();
512
513        while positions.len() < nnz {
514            let i = rng.gen_range(0..rows);
515            let j = rng.gen_range(0..cols);
516
517            if positions.insert((i, j)) {
518                let val = loop {
519                    let v: f64 = rng.gen_range(-10.0..10.0);
520                    if v.abs() > 1e-10 {
521                        break v;
522                    }
523                };
524
525                coo.push(i, j, val);
526            }
527        }
528
529        let actual_density = coo.nnz() as f64 / (rows as f64 * cols as f64);
530        println!("Created sparse matrix: {} x {}", rows, cols);
531        println!("  - Requested density: {:.6}", density);
532        println!("  - Actual density: {:.6}", actual_density);
533        println!("  - Sparsity: {:.4}%", (1.0 - actual_density) * 100.0);
534        println!("  - Non-zeros: {}", coo.nnz());
535
536        coo
537    }
538
539    #[test]
540    fn test_randomized_svd_accuracy() {
541        setup_thread_pool();
542
543        let coo = create_sparse_matrix(500, 40, 0.1);
544
545
546        let csr = CsrMatrix::from(&coo);
547
548        let mut std_svd = crate::lanczos::svd_dim_seed(&csr, 10, 42).unwrap();
549
550        let rand_svd = randomized_svd(
551            &csr,
552            10,
553            5,
554            4,
555            PowerIterationNormalizer::QR,
556            false,
557            Some(42),
558            true,
559        )
560        .unwrap();
561
562        assert_eq!(rand_svd.d, 10, "Expected rank of 10");
563
564        let rel_tol = 0.4;
565        let compare_count = std::cmp::min(std_svd.d, rand_svd.d);
566        println!("Standard SVD has {} dimensions", std_svd.d);
567        println!("Randomized SVD has {} dimensions", rand_svd.d);
568
569        for i in 0..compare_count {
570            let rel_diff = (std_svd.s[i] - rand_svd.s[i]).abs() / std_svd.s[i];
571            println!(
572                "Singular value {}: standard={}, randomized={}, rel_diff={}",
573                i, std_svd.s[i], rand_svd.s[i], rel_diff
574            );
575            assert!(
576                rel_diff < rel_tol,
577                "Dominant singular value {} differs too much: rel diff = {}, standard = {}, randomized = {}",
578                i, rel_diff, std_svd.s[i], rand_svd.s[i]
579            );
580        }
581
582
583    }
584
585    // Test with mean centering
586    #[test]
587    fn test_randomized_svd_with_mean_centering() {
588        setup_thread_pool();
589
590        let mut coo = CooMatrix::<f64>::new(30, 10);
591        let mut rng = StdRng::seed_from_u64(123);
592
593        let column_means: Vec<f64> = (0..10).map(|i| i as f64 * 2.0).collect();
594
595        let mut u = vec![vec![0.0; 3]; 30]; // 3 factors
596        let mut v = vec![vec![0.0; 3]; 10];
597
598        for i in 0..30 {
599            for j in 0..3 {
600                u[i][j] = rng.gen_range(-1.0..1.0);
601            }
602        }
603
604        for i in 0..10 {
605            for j in 0..3 {
606                v[i][j] = rng.gen_range(-1.0..1.0);
607            }
608        }
609
610        for i in 0..30 {
611            for j in 0..10 {
612                let mut val = 0.0;
613                for k in 0..3 {
614                    val += u[i][k] * v[j][k];
615                }
616                val = val + column_means[j] + rng.gen_range(-0.1..0.1);
617                coo.push(i, j, val);
618            }
619        }
620
621        let csr = CsrMatrix::from(&coo);
622
623        let svd_no_center = randomized_svd(
624            &csr,
625            3,
626            3,
627            2,
628            PowerIterationNormalizer::QR,
629            false,
630            Some(42),
631            false,
632        )
633        .unwrap();
634
635        let svd_with_center = randomized_svd(
636            &csr,
637            3,
638            3,
639            2,
640            PowerIterationNormalizer::QR,
641            true,
642            Some(42),
643            false,
644        )
645        .unwrap();
646
647        println!("Singular values without centering: {:?}", svd_no_center.s);
648        println!("Singular values with centering: {:?}", svd_with_center.s);
649    }
650
651    #[test]
652    fn test_randomized_svd_large_sparse() {
653        setup_thread_pool();
654
655        let test_matrix = create_sparse_matrix(5000, 1000, 0.01);
656
657        let csr = CsrMatrix::from(&test_matrix);
658
659        let result = randomized_svd(
660            &csr,
661            20,
662            10,
663            2,
664            PowerIterationNormalizer::QR,
665            false,
666            Some(42),
667            false,
668        );
669
670        assert!(
671            result.is_ok(),
672            "Randomized SVD failed on large sparse matrix: {:?}",
673            result.err().unwrap()
674        );
675
676        let svd = result.unwrap();
677        assert_eq!(svd.d, 20, "Expected rank of 20");
678        assert_eq!(svd.u.ncols(), 20, "Expected 20 left singular vectors");
679        assert_eq!(svd.u.nrows(), 5000, "Expected 5000 columns in U transpose");
680        assert_eq!(svd.vt.nrows(), 20, "Expected 20 right singular vectors");
681        assert_eq!(svd.vt.ncols(), 1000, "Expected 1000 columns in V transpose");
682
683        for i in 1..svd.s.len() {
684            assert!(svd.s[i] > 0.0, "Singular values should be positive");
685            assert!(
686                svd.s[i - 1] >= svd.s[i],
687                "Singular values should be in descending order"
688            );
689        }
690    }
691
692    // Test with different power iteration settings
693    #[test]
694    fn test_power_iteration_impact() {
695        setup_thread_pool();
696
697        let mut coo = CooMatrix::<f64>::new(100, 50);
698        let mut rng = StdRng::seed_from_u64(987);
699
700        let mut u = vec![vec![0.0; 10]; 100];
701        let mut v = vec![vec![0.0; 10]; 50];
702
703        for i in 0..100 {
704            for j in 0..10 {
705                u[i][j] = rng.random_range(-1.0..1.0);
706            }
707        }
708
709        for i in 0..50 {
710            for j in 0..10 {
711                v[i][j] = rng.random_range(-1.0..1.0);
712            }
713        }
714
715        for i in 0..100 {
716            for j in 0..50 {
717                let mut val = 0.0;
718                for k in 0..10 {
719                    val += u[i][k] * v[j][k];
720                }
721                val += rng.random_range(-0.01..0.01);
722                coo.push(i, j, val);
723            }
724        }
725
726        let csr = CsrMatrix::from(&coo);
727
728        let powers = [0, 1, 3, 5];
729        let mut errors = Vec::new();
730
731        let mut dense_mat = Array2::<f64>::zeros((100, 50));
732        for (i, j, val) in csr.triplet_iter() {
733            dense_mat[[i, j]] = *val;
734        }
735        let matrix_norm = dense_mat.iter().map(|x| x.powi(2)).sum::<f64>().sqrt();
736
737        for &power in &powers {
738            let svd = randomized_svd(
739                &csr,
740                10,
741                5,
742                power,
743                PowerIterationNormalizer::QR,
744                false,
745                Some(42),
746                false,
747            )
748            .unwrap();
749
750            let recon = svd.recompose();
751            let mut error = 0.0;
752
753            for i in 0..100 {
754                for j in 0..50 {
755                    error += (dense_mat[[i, j]] - recon[[i, j]]).powi(2);
756                }
757            }
758
759            error = error.sqrt() / matrix_norm;
760            errors.push(error);
761
762            println!("Power iterations: {}, Relative error: {}", power, error);
763        }
764
765        let mut improved = false;
766        for i in 1..errors.len() {
767            if errors[i] < errors[0] * 0.9 {
768                improved = true;
769                break;
770            }
771        }
772
773        assert!(
774            improved,
775            "Power iterations did not improve accuracy as expected"
776        );
777    }
778}