single_svdlib/randomized/
mod.rs

1use crate::error::SvdLibError;
2use crate::{Diagnostics, SMat, SvdFloat, SvdRec};
3use nalgebra_sparse::na::{ComplexField, DMatrix, DVector, RealField};
4use ndarray::Array1;
5use nshare::IntoNdarray2;
6use rand::prelude::{Distribution, StdRng};
7use rand::SeedableRng;
8use rand_distr::Normal;
9use rayon::iter::ParallelIterator;
10use rayon::prelude::IntoParallelIterator;
11use std::ops::Mul;
12use std::time::Instant;
13
14#[derive(Debug, Clone, Copy, PartialEq)]
15pub enum PowerIterationNormalizer {
16    QR,
17    LU,
18    None,
19}
20
21const PARALLEL_THRESHOLD_ROWS: usize = 5000;
22const PARALLEL_THRESHOLD_COLS: usize = 1000;
23
24pub fn randomized_svd<T, M>(
25    m: &M,
26    target_rank: usize,
27    n_oversamples: usize,
28    n_power_iters: usize,
29    power_iteration_normalizer: PowerIterationNormalizer,
30    mean_center: bool,
31    seed: Option<u64>,
32    verbose: bool,
33) -> anyhow::Result<SvdRec<T>>
34where
35    T: SvdFloat + RealField,
36    M: SMat<T> + std::marker::Sync,
37    T: ComplexField,
38{
39    let start = Instant::now();
40    let m_rows = m.nrows();
41    let m_cols = m.ncols();
42
43    let rank = target_rank.min(m_rows.min(m_cols));
44    let l = rank + n_oversamples;
45
46    let column_means: Option<DVector<T>> = if mean_center {
47        if verbose {
48            println!("SVD | Randomized | Computing column means....");
49        }
50        Some(DVector::from(m.compute_column_means()))
51    } else {
52        None
53    };
54    if verbose && mean_center {
55        println!(
56            "SVD | Randomized | Computed column means, took: {:?} of total running time",
57            start.elapsed()
58        );
59    }
60
61    let omega = generate_random_matrix(m_cols, l, seed);
62
63    let mut y = DMatrix::<T>::zeros(m_rows, l);
64    if verbose {
65        println!("SVD | Randomized | Multiplying m with omega matrix....");
66    }
67    multiply_matrix_centered(m, &omega, &mut y, false, &column_means);
68    if verbose {
69        println!(
70            "SVD | Randomized | Multiplication done, took: {:?} of total running time",
71            start.elapsed()
72        );
73    }
74    if verbose {
75        println!("SVD | Randomized | Starting power iterations....");
76    }
77    if n_power_iters > 0 {
78        let mut z = DMatrix::<T>::zeros(m_cols, l);
79
80        for i in 0..n_power_iters {
81            if verbose {
82                println!(
83                    "SVD | Randomized | Running power-iteration: {:?}, current time: {:?}",
84                    i,
85                    start.elapsed()
86                );
87            }
88            multiply_matrix_centered(m, &y, &mut z, true, &column_means);
89            if verbose {
90                println!(
91                    "SVD | Randomized | Forward Multiplication {:?}",
92                    start.elapsed()
93                );
94            }
95            match power_iteration_normalizer {
96                PowerIterationNormalizer::QR => {
97                    let qr = z.qr();
98                    z = qr.q();
99                }
100                PowerIterationNormalizer::LU => {
101                    normalize_columns(&mut z);
102                }
103                PowerIterationNormalizer::None => {}
104            }
105            if verbose {
106                println!(
107                    "SVD | Randomized | Power Iteration Normalization Forward-Step {:?}",
108                    start.elapsed()
109                );
110            }
111
112            multiply_matrix_centered(m, &z, &mut y, false, &column_means);
113            if verbose {
114                println!(
115                    "SVD | Randomized | Backward Multiplication {:?}",
116                    start.elapsed()
117                );
118            }
119            match power_iteration_normalizer {
120                PowerIterationNormalizer::QR => {
121                    let qr = y.qr();
122                    y = qr.q();
123                }
124                PowerIterationNormalizer::LU => normalize_columns(&mut y),
125                PowerIterationNormalizer::None => {}
126            }
127            if verbose {
128                println!(
129                    "SVD | Randomized | Power Iteration Normalization Backward-Step {:?}",
130                    start.elapsed()
131                );
132            }
133        }
134    }
135    if verbose {
136        println!(
137            "SVD | Randomized | Running QR-Normalization after Power-Iterations {:?}",
138            start.elapsed()
139        );
140    }
141    let qr = y.qr();
142    let y = qr.q();
143    if verbose {
144        println!(
145            "SVD | Randomized | Finished QR-Normalization after Power-Iterations {:?}",
146            start.elapsed()
147        );
148    }
149
150    let mut b = DMatrix::<T>::zeros(y.ncols(), m_cols);
151    multiply_transposed_by_matrix_centered(&y, m, &mut b, &column_means);
152    if verbose {
153        println!(
154            "SVD | Randomized | Transposed Matrix Multiplication {:?}",
155            start.elapsed()
156        );
157    }
158    let svd = b.svd(true, true);
159    if verbose {
160        println!(
161            "SVD | Randomized | Running Singular Value Decomposition, took {:?}",
162            start.elapsed()
163        );
164    }
165    let u_b = svd
166        .u
167        .ok_or_else(|| SvdLibError::Las2Error("SVD U computation failed".to_string()))?;
168    let singular_values = svd.singular_values;
169    let vt = svd
170        .v_t
171        .ok_or_else(|| SvdLibError::Las2Error("SVD V_t computation failed".to_string()))?;
172
173    let u = y.mul(&u_b);
174    let actual_rank = target_rank.min(singular_values.len());
175
176    let u_subset = u.columns(0, actual_rank);
177    let s = convert_singular_values(
178        <DVector<T>>::from(singular_values.rows(0, actual_rank)),
179        actual_rank,
180    );
181    let vt_subset = vt.rows(0, actual_rank).into_owned();
182    let u = u_subset.into_owned().into_ndarray2();
183    let vt = vt_subset.into_ndarray2();
184    Ok(SvdRec {
185        d: actual_rank,
186        u,
187        s,
188        vt,
189        diagnostics: create_diagnostics(
190            m,
191            actual_rank,
192            target_rank,
193            n_power_iters,
194            seed.unwrap_or(0) as u32,
195        ),
196    })
197}
198
199fn convert_singular_values<T: SvdFloat + ComplexField>(
200    values: DVector<T::RealField>,
201    size: usize,
202) -> Array1<T> {
203    let mut array = Array1::zeros(size);
204
205    for i in 0..size {
206        array[i] = T::from_real(values[i].clone());
207    }
208
209    array
210}
211
212fn create_diagnostics<T, M: SMat<T>>(
213    a: &M,
214    d: usize,
215    target_rank: usize,
216    power_iterations: usize,
217    seed: u32,
218) -> Diagnostics<T>
219where
220    T: SvdFloat,
221{
222    Diagnostics {
223        non_zero: a.nnz(),
224        dimensions: target_rank,
225        iterations: power_iterations,
226        transposed: false,
227        lanczos_steps: 0, // we dont do that
228        ritz_values_stabilized: d,
229        significant_values: d,
230        singular_values: d,
231        end_interval: [T::from(-1e-30).unwrap(), T::from(1e-30).unwrap()],
232        kappa: T::from(1e-6).unwrap(),
233        random_seed: seed,
234    }
235}
236
237fn normalize_columns<T: SvdFloat + RealField + Send + Sync>(matrix: &mut DMatrix<T>) {
238    let rows = matrix.nrows();
239    let cols = matrix.ncols();
240
241    if rows < PARALLEL_THRESHOLD_ROWS && cols < PARALLEL_THRESHOLD_COLS {
242        for j in 0..cols {
243            let mut norm = T::zero();
244
245            // Calculate column norm
246            for i in 0..rows {
247                norm += ComplexField::powi(matrix[(i, j)], 2);
248            }
249            norm = ComplexField::sqrt(norm);
250
251            if norm > T::from_f64(1e-10).unwrap() {
252                let scale = T::one() / norm;
253                for i in 0..rows {
254                    matrix[(i, j)] *= scale;
255                }
256            }
257        }
258        return;
259    }
260
261    let norms: Vec<T> = (0..cols)
262        .into_par_iter()
263        .map(|j| {
264            let mut norm = T::zero();
265            for i in 0..rows {
266                let val = unsafe { *matrix.get_unchecked((i, j)) };
267                norm += ComplexField::powi(val, 2);
268            }
269            ComplexField::sqrt(norm)
270        })
271        .collect();
272
273    let scales: Vec<(usize, T)> = norms
274        .into_iter()
275        .enumerate()
276        .filter_map(|(j, norm)| {
277            if norm > T::from_f64(1e-10).unwrap() {
278                Some((j, T::one() / norm))
279            } else {
280                None // Skip columns with too small norms
281            }
282        })
283        .collect();
284
285    scales.iter().for_each(|(j, scale)| {
286        for i in 0..rows {
287            let value = matrix.get_mut((i, *j)).unwrap();
288            *value = value.clone() * scale.clone();
289        }
290    });
291}
292
293// ----------------------------------------
294// Utils Functions
295// ----------------------------------------
296
297fn generate_random_matrix<T: SvdFloat + RealField>(
298    rows: usize,
299    cols: usize,
300    seed: Option<u64>,
301) -> DMatrix<T> {
302    let mut rng = match seed {
303        Some(s) => StdRng::seed_from_u64(s),
304        None => StdRng::seed_from_u64(0),
305    };
306
307    let normal = Normal::new(0.0, 1.0).unwrap();
308    DMatrix::from_fn(rows, cols, |_, _| {
309        T::from_f64(normal.sample(&mut rng)).unwrap()
310    })
311}
312
313fn multiply_matrix<T: SvdFloat, M: SMat<T>>(
314    sparse: &M,
315    dense: &DMatrix<T>,
316    result: &mut DMatrix<T>,
317    transpose_sparse: bool,
318) {
319    sparse.multiply_with_dense(dense, result, transpose_sparse)
320}
321
322fn multiply_transposed_by_matrix<T: SvdFloat, M: SMat<T> + std::marker::Sync>(
323    q: &DMatrix<T>,
324    sparse: &M,
325    result: &mut DMatrix<T>,
326) {
327    sparse.multiply_transposed_by_dense(q, result);
328}
329
330pub fn svd_flip<T: SvdFloat + 'static>(
331    u: Option<&mut DMatrix<T>>,
332    v: Option<&mut DMatrix<T>>,
333    u_based_decision: bool,
334) -> Result<(), SvdLibError> {
335    if u.is_none() && v.is_none() {
336        return Err(SvdLibError::Las2Error(
337            "Both u and v cannot be None".to_string(),
338        ));
339    }
340
341    if u_based_decision {
342        if u.is_none() {
343            return Err(SvdLibError::Las2Error(
344                "u cannot be None when u_based_decision is true".to_string(),
345            ));
346        }
347
348        let u = u.unwrap();
349        let ncols = u.ncols();
350        let nrows = u.nrows();
351
352        let mut signs = DVector::from_element(ncols, T::one());
353
354        for j in 0..ncols {
355            let mut max_abs = T::zero();
356            let mut max_idx = 0;
357
358            for i in 0..nrows {
359                let abs_val = num_traits::Float::abs(u[(i, j)]);
360                if abs_val > max_abs {
361                    max_abs = abs_val;
362                    max_idx = i;
363                }
364            }
365
366            if u[(max_idx, j)] < T::zero() {
367                signs[j] = -T::one();
368            }
369        }
370
371        for j in 0..ncols {
372            for i in 0..nrows {
373                u[(i, j)] *= signs[j];
374            }
375        }
376
377        if let Some(v) = v {
378            let v_nrows = v.nrows();
379            let v_ncols = v.ncols();
380
381            for i in 0..v_nrows.min(signs.len()) {
382                for j in 0..v_ncols {
383                    v[(i, j)] *= signs[i];
384                }
385            }
386        }
387    } else {
388        if v.is_none() {
389            return Err(SvdLibError::Las2Error(
390                "v cannot be None when u_based_decision is false".to_string(),
391            ));
392        }
393
394        let v = v.unwrap();
395        let nrows = v.nrows();
396        let ncols = v.ncols();
397
398        let mut signs = DVector::from_element(nrows, T::one());
399
400        for i in 0..nrows {
401            let mut max_abs = T::zero();
402            let mut max_idx = 0;
403
404            for j in 0..ncols {
405                let abs_val = num_traits::Float::abs(v[(i, j)]);
406                if abs_val > max_abs {
407                    max_abs = abs_val;
408                    max_idx = j;
409                }
410            }
411
412            if v[(i, max_idx)] < T::zero() {
413                signs[i] = -T::one();
414            }
415        }
416
417        for i in 0..nrows {
418            for j in 0..ncols {
419                v[(i, j)] *= signs[i];
420            }
421        }
422
423        if let Some(u) = u {
424            let u_nrows = u.nrows();
425            let u_ncols = u.ncols();
426
427            for j in 0..u_ncols.min(signs.len()) {
428                for i in 0..u_nrows {
429                    u[(i, j)] *= signs[j];
430                }
431            }
432        }
433    }
434
435    Ok(())
436}
437
438fn multiply_matrix_centered<T: SvdFloat, M: SMat<T> + std::marker::Sync>(
439    sparse: &M,
440    dense: &DMatrix<T>,
441    result: &mut DMatrix<T>,
442    transpose_sparse: bool,
443    column_means: &Option<DVector<T>>,
444) {
445    if column_means.is_none() {
446        multiply_matrix(sparse, dense, result, transpose_sparse);
447        return;
448    }
449
450    let means = column_means.as_ref().unwrap();
451    sparse.multiply_with_dense_centered(dense, result, transpose_sparse, means)
452}
453
454fn multiply_transposed_by_matrix_centered<T: SvdFloat, M: SMat<T> + std::marker::Sync>(
455    q: &DMatrix<T>,
456    sparse: &M,
457    result: &mut DMatrix<T>,
458    column_means: &Option<DVector<T>>,
459) {
460     if column_means.is_none() {
461        multiply_transposed_by_matrix(q, sparse, result);
462        return;
463    }
464
465    let means = column_means.as_ref().unwrap();
466    sparse.multiply_transposed_by_dense_centered(q, result, means);
467}
468
469#[cfg(test)]
470mod randomized_svd_tests {
471    use super::*;
472    use crate::randomized::{randomized_svd, PowerIterationNormalizer};
473    use nalgebra_sparse::coo::CooMatrix;
474    use nalgebra_sparse::CsrMatrix;
475    use ndarray::Array2;
476    use rand::rngs::StdRng;
477    use rand::{Rng, SeedableRng};
478    use rayon::ThreadPoolBuilder;
479    use std::sync::Once;
480
481    static INIT: Once = Once::new();
482
483    fn setup_thread_pool() {
484        INIT.call_once(|| {
485            ThreadPoolBuilder::new()
486                .num_threads(16)
487                .build_global()
488                .expect("Failed to build global thread pool");
489
490            println!("Initialized thread pool with {} threads", 16);
491        });
492    }
493
494    fn create_sparse_matrix(
495        rows: usize,
496        cols: usize,
497        density: f64,
498    ) -> nalgebra_sparse::coo::CooMatrix<f64> {
499        use std::collections::HashSet;
500
501        let mut coo = nalgebra_sparse::coo::CooMatrix::new(rows, cols);
502
503        let mut rng = StdRng::seed_from_u64(42);
504
505        let nnz = (rows as f64 * cols as f64 * density).round() as usize;
506
507        let nnz = nnz.max(1);
508
509        let mut positions = HashSet::new();
510
511        while positions.len() < nnz {
512            let i = rng.gen_range(0..rows);
513            let j = rng.gen_range(0..cols);
514
515            if positions.insert((i, j)) {
516                let val = loop {
517                    let v: f64 = rng.gen_range(-10.0..10.0);
518                    if v.abs() > 1e-10 {
519                        break v;
520                    }
521                };
522
523                coo.push(i, j, val);
524            }
525        }
526
527        let actual_density = coo.nnz() as f64 / (rows as f64 * cols as f64);
528        println!("Created sparse matrix: {} x {}", rows, cols);
529        println!("  - Requested density: {:.6}", density);
530        println!("  - Actual density: {:.6}", actual_density);
531        println!("  - Sparsity: {:.4}%", (1.0 - actual_density) * 100.0);
532        println!("  - Non-zeros: {}", coo.nnz());
533
534        coo
535    }
536
537    #[test]
538    fn test_randomized_svd_accuracy() {
539        setup_thread_pool();
540
541        let coo = create_sparse_matrix(500, 40, 0.1);
542
543
544        let csr = CsrMatrix::from(&coo);
545
546        let mut std_svd = crate::lanczos::svd_dim_seed(&csr, 10, 42).unwrap();
547
548        let rand_svd = randomized_svd(
549            &csr,
550            10,
551            5,
552            4,
553            PowerIterationNormalizer::QR,
554            false,
555            Some(42),
556            true,
557        )
558        .unwrap();
559
560        assert_eq!(rand_svd.d, 10, "Expected rank of 10");
561
562        let rel_tol = 0.4;
563        let compare_count = std::cmp::min(std_svd.d, rand_svd.d);
564        println!("Standard SVD has {} dimensions", std_svd.d);
565        println!("Randomized SVD has {} dimensions", rand_svd.d);
566
567        for i in 0..compare_count {
568            let rel_diff = (std_svd.s[i] - rand_svd.s[i]).abs() / std_svd.s[i];
569            println!(
570                "Singular value {}: standard={}, randomized={}, rel_diff={}",
571                i, std_svd.s[i], rand_svd.s[i], rel_diff
572            );
573            assert!(
574                rel_diff < rel_tol,
575                "Dominant singular value {} differs too much: rel diff = {}, standard = {}, randomized = {}",
576                i, rel_diff, std_svd.s[i], rand_svd.s[i]
577            );
578        }
579
580
581    }
582
583    // Test with mean centering
584    #[test]
585    fn test_randomized_svd_with_mean_centering() {
586        setup_thread_pool();
587
588        let mut coo = CooMatrix::<f64>::new(30, 10);
589        let mut rng = StdRng::seed_from_u64(123);
590
591        let column_means: Vec<f64> = (0..10).map(|i| i as f64 * 2.0).collect();
592
593        let mut u = vec![vec![0.0; 3]; 30]; // 3 factors
594        let mut v = vec![vec![0.0; 3]; 10];
595
596        for i in 0..30 {
597            for j in 0..3 {
598                u[i][j] = rng.gen_range(-1.0..1.0);
599            }
600        }
601
602        for i in 0..10 {
603            for j in 0..3 {
604                v[i][j] = rng.gen_range(-1.0..1.0);
605            }
606        }
607
608        for i in 0..30 {
609            for j in 0..10 {
610                let mut val = 0.0;
611                for k in 0..3 {
612                    val += u[i][k] * v[j][k];
613                }
614                val = val + column_means[j] + rng.gen_range(-0.1..0.1);
615                coo.push(i, j, val);
616            }
617        }
618
619        let csr = CsrMatrix::from(&coo);
620
621        let svd_no_center = randomized_svd(
622            &csr,
623            3,
624            3,
625            2,
626            PowerIterationNormalizer::QR,
627            false,
628            Some(42),
629            false,
630        )
631        .unwrap();
632
633        let svd_with_center = randomized_svd(
634            &csr,
635            3,
636            3,
637            2,
638            PowerIterationNormalizer::QR,
639            true,
640            Some(42),
641            false,
642        )
643        .unwrap();
644
645        println!("Singular values without centering: {:?}", svd_no_center.s);
646        println!("Singular values with centering: {:?}", svd_with_center.s);
647    }
648
649    #[test]
650    fn test_randomized_svd_large_sparse() {
651        setup_thread_pool();
652
653        let test_matrix = create_sparse_matrix(5000, 1000, 0.01);
654
655        let csr = CsrMatrix::from(&test_matrix);
656
657        let result = randomized_svd(
658            &csr,
659            20,
660            10,
661            2,
662            PowerIterationNormalizer::QR,
663            false,
664            Some(42),
665            false,
666        );
667
668        assert!(
669            result.is_ok(),
670            "Randomized SVD failed on large sparse matrix: {:?}",
671            result.err().unwrap()
672        );
673
674        let svd = result.unwrap();
675        assert_eq!(svd.d, 20, "Expected rank of 20");
676        assert_eq!(svd.u.ncols(), 20, "Expected 20 left singular vectors");
677        assert_eq!(svd.u.nrows(), 5000, "Expected 5000 columns in U transpose");
678        assert_eq!(svd.vt.nrows(), 20, "Expected 20 right singular vectors");
679        assert_eq!(svd.vt.ncols(), 1000, "Expected 1000 columns in V transpose");
680
681        for i in 1..svd.s.len() {
682            assert!(svd.s[i] > 0.0, "Singular values should be positive");
683            assert!(
684                svd.s[i - 1] >= svd.s[i],
685                "Singular values should be in descending order"
686            );
687        }
688    }
689
690    // Test with different power iteration settings
691    #[test]
692    fn test_power_iteration_impact() {
693        setup_thread_pool();
694
695        let mut coo = CooMatrix::<f64>::new(100, 50);
696        let mut rng = StdRng::seed_from_u64(987);
697
698        let mut u = vec![vec![0.0; 10]; 100];
699        let mut v = vec![vec![0.0; 10]; 50];
700
701        for i in 0..100 {
702            for j in 0..10 {
703                u[i][j] = rng.random_range(-1.0..1.0);
704            }
705        }
706
707        for i in 0..50 {
708            for j in 0..10 {
709                v[i][j] = rng.random_range(-1.0..1.0);
710            }
711        }
712
713        for i in 0..100 {
714            for j in 0..50 {
715                let mut val = 0.0;
716                for k in 0..10 {
717                    val += u[i][k] * v[j][k];
718                }
719                val += rng.random_range(-0.01..0.01);
720                coo.push(i, j, val);
721            }
722        }
723
724        let csr = CsrMatrix::from(&coo);
725
726        let powers = [0, 1, 3, 5];
727        let mut errors = Vec::new();
728
729        let mut dense_mat = Array2::<f64>::zeros((100, 50));
730        for (i, j, val) in csr.triplet_iter() {
731            dense_mat[[i, j]] = *val;
732        }
733        let matrix_norm = dense_mat.iter().map(|x| x.powi(2)).sum::<f64>().sqrt();
734
735        for &power in &powers {
736            let svd = randomized_svd(
737                &csr,
738                10,
739                5,
740                power,
741                PowerIterationNormalizer::QR,
742                false,
743                Some(42),
744                false,
745            )
746            .unwrap();
747
748            let recon = svd.recompose();
749            let mut error = 0.0;
750
751            for i in 0..100 {
752                for j in 0..50 {
753                    error += (dense_mat[[i, j]] - recon[[i, j]]).powi(2);
754                }
755            }
756
757            error = error.sqrt() / matrix_norm;
758            errors.push(error);
759
760            println!("Power iterations: {}, Relative error: {}", power, error);
761        }
762
763        let mut improved = false;
764        for i in 1..errors.len() {
765            if errors[i] < errors[0] * 0.9 {
766                improved = true;
767                break;
768            }
769        }
770
771        assert!(
772            improved,
773            "Power iterations did not improve accuracy as expected"
774        );
775    }
776}