single_svdlib/randomized/
mod.rs

1use crate::error::SvdLibError;
2use crate::{Diagnostics, IntoNdarray2, SMat, SvdFloat, SvdRec};
3use nalgebra_sparse::na::{ComplexField, DMatrix, DVector, RealField};
4use ndarray::Array1;
5use rand::prelude::{Distribution, StdRng};
6use rand::SeedableRng;
7use rand_distr::Normal;
8use rayon::iter::ParallelIterator;
9use rayon::prelude::IntoParallelIterator;
10use std::ops::Mul;
11use std::time::Instant;
12
13#[derive(Debug, Clone, Copy, PartialEq)]
14pub enum PowerIterationNormalizer {
15    QR,
16    LU,
17    None,
18}
19
20const PARALLEL_THRESHOLD_ROWS: usize = 5000;
21const PARALLEL_THRESHOLD_COLS: usize = 1000;
22
23pub fn randomized_svd<T, M>(
24    m: &M,
25    target_rank: usize,
26    n_oversamples: usize,
27    n_power_iters: usize,
28    power_iteration_normalizer: PowerIterationNormalizer,
29    mean_center: bool,
30    seed: Option<u64>,
31    verbose: bool,
32) -> anyhow::Result<SvdRec<T>>
33where
34    T: SvdFloat + RealField,
35    M: SMat<T> + std::marker::Sync,
36    T: ComplexField,
37{
38    let start = Instant::now();
39    let m_rows = m.nrows();
40    let m_cols = m.ncols();
41
42    let rank = target_rank.min(m_rows.min(m_cols));
43    let l = rank + n_oversamples;
44
45    let column_means: Option<DVector<T>> = if mean_center {
46        if verbose {
47            println!("SVD | Randomized | Computing column means....");
48        }
49        Some(DVector::from(m.compute_column_means()))
50    } else {
51        None
52    };
53    if verbose && mean_center {
54        println!(
55            "SVD | Randomized | Computed column means, took: {:?} of total running time",
56            start.elapsed()
57        );
58    }
59
60    let omega = generate_random_matrix(m_cols, l, seed);
61
62    let mut y = DMatrix::<T>::zeros(m_rows, l);
63    if verbose {
64        println!("SVD | Randomized | Multiplying m with omega matrix....");
65    }
66    multiply_matrix_centered(m, &omega, &mut y, false, &column_means);
67    if verbose {
68        println!(
69            "SVD | Randomized | Multiplication done, took: {:?} of total running time",
70            start.elapsed()
71        );
72    }
73    if verbose {
74        println!("SVD | Randomized | Starting power iterations....");
75    }
76    if n_power_iters > 0 {
77        let mut z = DMatrix::<T>::zeros(m_cols, l);
78
79        for i in 0..n_power_iters {
80            if verbose {
81                println!(
82                    "SVD | Randomized | Running power-iteration: {:?}, current time: {:?}",
83                    i,
84                    start.elapsed()
85                );
86            }
87            multiply_matrix_centered(m, &y, &mut z, true, &column_means);
88            if verbose {
89                println!(
90                    "SVD | Randomized | Forward Multiplication {:?}",
91                    start.elapsed()
92                );
93            }
94            match power_iteration_normalizer {
95                PowerIterationNormalizer::QR => {
96                    let qr = z.qr();
97                    z = qr.q();
98                    // After QR normalization, z has fewer columns, so we need to resize y
99                    y = DMatrix::<T>::zeros(m_rows, z.ncols());
100                }
101                PowerIterationNormalizer::LU => {
102                    normalize_columns(&mut z);
103                }
104                PowerIterationNormalizer::None => {}
105            }
106            if verbose {
107                println!(
108                    "SVD | Randomized | Power Iteration Normalization Forward-Step {:?}",
109                    start.elapsed()
110                );
111            }
112
113            multiply_matrix_centered(m, &z, &mut y, false, &column_means);
114            if verbose {
115                println!(
116                    "SVD | Randomized | Backward Multiplication {:?}",
117                    start.elapsed()
118                );
119            }
120            match power_iteration_normalizer {
121                PowerIterationNormalizer::QR => {
122                    let qr = y.qr();
123                    y = qr.q();
124                }
125                PowerIterationNormalizer::LU => normalize_columns(&mut y),
126                PowerIterationNormalizer::None => {}
127            }
128            if verbose {
129                println!(
130                    "SVD | Randomized | Power Iteration Normalization Backward-Step {:?}",
131                    start.elapsed()
132                );
133            }
134        }
135    }
136    if verbose {
137        println!(
138            "SVD | Randomized | Running QR-Normalization after Power-Iterations {:?}",
139            start.elapsed()
140        );
141    }
142    let qr = y.qr();
143    let y = qr.q();
144    if verbose {
145        println!(
146            "SVD | Randomized | Finished QR-Normalization after Power-Iterations {:?}",
147            start.elapsed()
148        );
149    }
150
151    let mut b = DMatrix::<T>::zeros(y.ncols(), m_cols);
152    multiply_transposed_by_matrix_centered(&y, m, &mut b, &column_means);
153    if verbose {
154        println!(
155            "SVD | Randomized | Transposed Matrix Multiplication {:?}",
156            start.elapsed()
157        );
158    }
159    let svd = b.svd(true, true);
160    if verbose {
161        println!(
162            "SVD | Randomized | Running Singular Value Decomposition, took {:?}",
163            start.elapsed()
164        );
165    }
166    let u_b = svd
167        .u
168        .ok_or_else(|| SvdLibError::Las2Error("SVD U computation failed".to_string()))?;
169    let singular_values = svd.singular_values;
170    let vt = svd
171        .v_t
172        .ok_or_else(|| SvdLibError::Las2Error("SVD V_t computation failed".to_string()))?;
173
174    let u = y.mul(&u_b);
175    let actual_rank = target_rank.min(singular_values.len());
176
177    let u_subset = u.columns(0, actual_rank);
178    let s = convert_singular_values(
179        <DVector<T>>::from(singular_values.rows(0, actual_rank)),
180        actual_rank,
181    );
182    let vt_subset = vt.rows(0, actual_rank).into_owned();
183    let u = u_subset.into_owned().into_ndarray2();
184    let vt = vt_subset.into_ndarray2();
185    Ok(SvdRec {
186        d: actual_rank,
187        u,
188        s,
189        vt,
190        diagnostics: create_diagnostics(
191            m,
192            actual_rank,
193            target_rank,
194            n_power_iters,
195            seed.unwrap_or(0) as u32,
196        ),
197    })
198}
199
200fn convert_singular_values<T: SvdFloat + ComplexField>(
201    values: DVector<T::RealField>,
202    size: usize,
203) -> Array1<T> {
204    let mut array = Array1::zeros(size);
205
206    for i in 0..size {
207        array[i] = T::from_real(values[i].clone());
208    }
209
210    array
211}
212
213fn create_diagnostics<T, M: SMat<T>>(
214    a: &M,
215    d: usize,
216    target_rank: usize,
217    power_iterations: usize,
218    seed: u32,
219) -> Diagnostics<T>
220where
221    T: SvdFloat,
222{
223    Diagnostics {
224        non_zero: a.nnz(),
225        dimensions: target_rank,
226        iterations: power_iterations,
227        transposed: false,
228        lanczos_steps: 0, // we dont do that
229        ritz_values_stabilized: d,
230        significant_values: d,
231        singular_values: d,
232        end_interval: [T::from(-1e-30).unwrap(), T::from(1e-30).unwrap()],
233        kappa: T::from(1e-6).unwrap(),
234        random_seed: seed,
235    }
236}
237
238fn normalize_columns<T: SvdFloat + RealField + Send + Sync>(matrix: &mut DMatrix<T>) {
239    let rows = matrix.nrows();
240    let cols = matrix.ncols();
241
242    if rows < PARALLEL_THRESHOLD_ROWS && cols < PARALLEL_THRESHOLD_COLS {
243        for j in 0..cols {
244            let mut norm = T::zero();
245
246            // Calculate column norm
247            for i in 0..rows {
248                norm += ComplexField::powi(matrix[(i, j)], 2);
249            }
250            norm = ComplexField::sqrt(norm);
251
252            if norm > T::from_f64(1e-10).unwrap() {
253                let scale = T::one() / norm;
254                for i in 0..rows {
255                    matrix[(i, j)] *= scale;
256                }
257            }
258        }
259        return;
260    }
261
262    let norms: Vec<T> = (0..cols)
263        .into_par_iter()
264        .map(|j| {
265            let mut norm = T::zero();
266            for i in 0..rows {
267                let val = unsafe { *matrix.get_unchecked((i, j)) };
268                norm += ComplexField::powi(val, 2);
269            }
270            ComplexField::sqrt(norm)
271        })
272        .collect();
273
274    let scales: Vec<(usize, T)> = norms
275        .into_iter()
276        .enumerate()
277        .filter_map(|(j, norm)| {
278            if norm > T::from_f64(1e-10).unwrap() {
279                Some((j, T::one() / norm))
280            } else {
281                None // Skip columns with too small norms
282            }
283        })
284        .collect();
285
286    scales.iter().for_each(|(j, scale)| {
287        for i in 0..rows {
288            let value = matrix.get_mut((i, *j)).unwrap();
289            *value = value.clone() * scale.clone();
290        }
291    });
292}
293
294// ----------------------------------------
295// Utils Functions
296// ----------------------------------------
297
298fn generate_random_matrix<T: SvdFloat + RealField>(
299    rows: usize,
300    cols: usize,
301    seed: Option<u64>,
302) -> DMatrix<T> {
303    let mut rng = match seed {
304        Some(s) => StdRng::seed_from_u64(s),
305        None => StdRng::seed_from_u64(0),
306    };
307
308    let normal = Normal::new(0.0, 1.0).unwrap();
309    DMatrix::from_fn(rows, cols, |_, _| {
310        T::from_f64(normal.sample(&mut rng)).unwrap()
311    })
312}
313
314fn multiply_matrix<T: SvdFloat, M: SMat<T>>(
315    sparse: &M,
316    dense: &DMatrix<T>,
317    result: &mut DMatrix<T>,
318    transpose_sparse: bool,
319) {
320    sparse.multiply_with_dense(dense, result, transpose_sparse)
321}
322
323fn multiply_transposed_by_matrix<T: SvdFloat, M: SMat<T> + std::marker::Sync>(
324    q: &DMatrix<T>,
325    sparse: &M,
326    result: &mut DMatrix<T>,
327) {
328    sparse.multiply_transposed_by_dense(q, result);
329}
330
331pub fn svd_flip<T: SvdFloat + 'static>(
332    u: Option<&mut DMatrix<T>>,
333    v: Option<&mut DMatrix<T>>,
334    u_based_decision: bool,
335) -> Result<(), SvdLibError> {
336    if u.is_none() && v.is_none() {
337        return Err(SvdLibError::Las2Error(
338            "Both u and v cannot be None".to_string(),
339        ));
340    }
341
342    if u_based_decision {
343        if u.is_none() {
344            return Err(SvdLibError::Las2Error(
345                "u cannot be None when u_based_decision is true".to_string(),
346            ));
347        }
348
349        let u = u.unwrap();
350        let ncols = u.ncols();
351        let nrows = u.nrows();
352
353        let mut signs = DVector::from_element(ncols, T::one());
354
355        for j in 0..ncols {
356            let mut max_abs = T::zero();
357            let mut max_idx = 0;
358
359            for i in 0..nrows {
360                let abs_val = num_traits::Float::abs(u[(i, j)]);
361                if abs_val > max_abs {
362                    max_abs = abs_val;
363                    max_idx = i;
364                }
365            }
366
367            if u[(max_idx, j)] < T::zero() {
368                signs[j] = -T::one();
369            }
370        }
371
372        for j in 0..ncols {
373            for i in 0..nrows {
374                u[(i, j)] *= signs[j];
375            }
376        }
377
378        if let Some(v) = v {
379            let v_nrows = v.nrows();
380            let v_ncols = v.ncols();
381
382            for i in 0..v_nrows.min(signs.len()) {
383                for j in 0..v_ncols {
384                    v[(i, j)] *= signs[i];
385                }
386            }
387        }
388    } else {
389        if v.is_none() {
390            return Err(SvdLibError::Las2Error(
391                "v cannot be None when u_based_decision is false".to_string(),
392            ));
393        }
394
395        let v = v.unwrap();
396        let nrows = v.nrows();
397        let ncols = v.ncols();
398
399        let mut signs = DVector::from_element(nrows, T::one());
400
401        for i in 0..nrows {
402            let mut max_abs = T::zero();
403            let mut max_idx = 0;
404
405            for j in 0..ncols {
406                let abs_val = num_traits::Float::abs(v[(i, j)]);
407                if abs_val > max_abs {
408                    max_abs = abs_val;
409                    max_idx = j;
410                }
411            }
412
413            if v[(i, max_idx)] < T::zero() {
414                signs[i] = -T::one();
415            }
416        }
417
418        for i in 0..nrows {
419            for j in 0..ncols {
420                v[(i, j)] *= signs[i];
421            }
422        }
423
424        if let Some(u) = u {
425            let u_nrows = u.nrows();
426            let u_ncols = u.ncols();
427
428            for j in 0..u_ncols.min(signs.len()) {
429                for i in 0..u_nrows {
430                    u[(i, j)] *= signs[j];
431                }
432            }
433        }
434    }
435
436    Ok(())
437}
438
439fn multiply_matrix_centered<T: SvdFloat, M: SMat<T> + std::marker::Sync>(
440    sparse: &M,
441    dense: &DMatrix<T>,
442    result: &mut DMatrix<T>,
443    transpose_sparse: bool,
444    column_means: &Option<DVector<T>>,
445) {
446    if column_means.is_none() {
447        multiply_matrix(sparse, dense, result, transpose_sparse);
448        return;
449    }
450
451    let means = column_means.as_ref().unwrap();
452    sparse.multiply_with_dense_centered(dense, result, transpose_sparse, means)
453}
454
455fn multiply_transposed_by_matrix_centered<T: SvdFloat, M: SMat<T> + std::marker::Sync>(
456    q: &DMatrix<T>,
457    sparse: &M,
458    result: &mut DMatrix<T>,
459    column_means: &Option<DVector<T>>,
460) {
461     if column_means.is_none() {
462        multiply_transposed_by_matrix(q, sparse, result);
463        return;
464    }
465
466    let means = column_means.as_ref().unwrap();
467    sparse.multiply_transposed_by_dense_centered(q, result, means);
468}
469
470#[cfg(test)]
471mod randomized_svd_tests {
472    use super::*;
473    use crate::randomized::{randomized_svd, PowerIterationNormalizer};
474    use nalgebra_sparse::coo::CooMatrix;
475    use nalgebra_sparse::CsrMatrix;
476    use ndarray::Array2;
477    use rand::rngs::StdRng;
478    use rand::{Rng, SeedableRng};
479    use rayon::ThreadPoolBuilder;
480    use std::sync::Once;
481
482    static INIT: Once = Once::new();
483
484    fn setup_thread_pool() {
485        INIT.call_once(|| {
486            ThreadPoolBuilder::new()
487                .num_threads(16)
488                .build_global()
489                .expect("Failed to build global thread pool");
490
491            println!("Initialized thread pool with {} threads", 16);
492        });
493    }
494
495    fn create_sparse_matrix(
496        rows: usize,
497        cols: usize,
498        density: f64,
499    ) -> nalgebra_sparse::coo::CooMatrix<f64> {
500        use std::collections::HashSet;
501
502        let mut coo = nalgebra_sparse::coo::CooMatrix::new(rows, cols);
503
504        let mut rng = StdRng::seed_from_u64(42);
505
506        let nnz = (rows as f64 * cols as f64 * density).round() as usize;
507
508        let nnz = nnz.max(1);
509
510        let mut positions = HashSet::new();
511
512        while positions.len() < nnz {
513            let i = rng.gen_range(0..rows);
514            let j = rng.gen_range(0..cols);
515
516            if positions.insert((i, j)) {
517                let val = loop {
518                    let v: f64 = rng.gen_range(-10.0..10.0);
519                    if v.abs() > 1e-10 {
520                        break v;
521                    }
522                };
523
524                coo.push(i, j, val);
525            }
526        }
527
528        let actual_density = coo.nnz() as f64 / (rows as f64 * cols as f64);
529        println!("Created sparse matrix: {} x {}", rows, cols);
530        println!("  - Requested density: {:.6}", density);
531        println!("  - Actual density: {:.6}", actual_density);
532        println!("  - Sparsity: {:.4}%", (1.0 - actual_density) * 100.0);
533        println!("  - Non-zeros: {}", coo.nnz());
534
535        coo
536    }
537
538    #[test]
539    fn test_randomized_svd_accuracy() {
540        setup_thread_pool();
541
542        let coo = create_sparse_matrix(500, 40, 0.1);
543
544
545        let csr = CsrMatrix::from(&coo);
546
547        let mut std_svd = crate::lanczos::svd_dim_seed(&csr, 10, 42).unwrap();
548
549        let rand_svd = randomized_svd(
550            &csr,
551            10,
552            5,
553            4,
554            PowerIterationNormalizer::QR,
555            false,
556            Some(42),
557            true,
558        )
559        .unwrap();
560
561        assert_eq!(rand_svd.d, 10, "Expected rank of 10");
562
563        let rel_tol = 0.4;
564        let compare_count = std::cmp::min(std_svd.d, rand_svd.d);
565        println!("Standard SVD has {} dimensions", std_svd.d);
566        println!("Randomized SVD has {} dimensions", rand_svd.d);
567
568        for i in 0..compare_count {
569            let rel_diff = (std_svd.s[i] - rand_svd.s[i]).abs() / std_svd.s[i];
570            println!(
571                "Singular value {}: standard={}, randomized={}, rel_diff={}",
572                i, std_svd.s[i], rand_svd.s[i], rel_diff
573            );
574            assert!(
575                rel_diff < rel_tol,
576                "Dominant singular value {} differs too much: rel diff = {}, standard = {}, randomized = {}",
577                i, rel_diff, std_svd.s[i], rand_svd.s[i]
578            );
579        }
580
581
582    }
583
584    // Test with mean centering
585    #[test]
586    fn test_randomized_svd_with_mean_centering() {
587        setup_thread_pool();
588
589        let mut coo = CooMatrix::<f64>::new(30, 10);
590        let mut rng = StdRng::seed_from_u64(123);
591
592        let column_means: Vec<f64> = (0..10).map(|i| i as f64 * 2.0).collect();
593
594        let mut u = vec![vec![0.0; 3]; 30]; // 3 factors
595        let mut v = vec![vec![0.0; 3]; 10];
596
597        for i in 0..30 {
598            for j in 0..3 {
599                u[i][j] = rng.gen_range(-1.0..1.0);
600            }
601        }
602
603        for i in 0..10 {
604            for j in 0..3 {
605                v[i][j] = rng.gen_range(-1.0..1.0);
606            }
607        }
608
609        for i in 0..30 {
610            for j in 0..10 {
611                let mut val = 0.0;
612                for k in 0..3 {
613                    val += u[i][k] * v[j][k];
614                }
615                val = val + column_means[j] + rng.gen_range(-0.1..0.1);
616                coo.push(i, j, val);
617            }
618        }
619
620        let csr = CsrMatrix::from(&coo);
621
622        let svd_no_center = randomized_svd(
623            &csr,
624            3,
625            3,
626            2,
627            PowerIterationNormalizer::QR,
628            false,
629            Some(42),
630            false,
631        )
632        .unwrap();
633
634        let svd_with_center = randomized_svd(
635            &csr,
636            3,
637            3,
638            2,
639            PowerIterationNormalizer::QR,
640            true,
641            Some(42),
642            false,
643        )
644        .unwrap();
645
646        println!("Singular values without centering: {:?}", svd_no_center.s);
647        println!("Singular values with centering: {:?}", svd_with_center.s);
648    }
649
650    #[test]
651    fn test_randomized_svd_large_sparse() {
652        setup_thread_pool();
653
654        let test_matrix = create_sparse_matrix(5000, 1000, 0.01);
655
656        let csr = CsrMatrix::from(&test_matrix);
657
658        let result = randomized_svd(
659            &csr,
660            20,
661            10,
662            2,
663            PowerIterationNormalizer::QR,
664            false,
665            Some(42),
666            false,
667        );
668
669        assert!(
670            result.is_ok(),
671            "Randomized SVD failed on large sparse matrix: {:?}",
672            result.err().unwrap()
673        );
674
675        let svd = result.unwrap();
676        assert_eq!(svd.d, 20, "Expected rank of 20");
677        assert_eq!(svd.u.ncols(), 20, "Expected 20 left singular vectors");
678        assert_eq!(svd.u.nrows(), 5000, "Expected 5000 columns in U transpose");
679        assert_eq!(svd.vt.nrows(), 20, "Expected 20 right singular vectors");
680        assert_eq!(svd.vt.ncols(), 1000, "Expected 1000 columns in V transpose");
681
682        for i in 1..svd.s.len() {
683            assert!(svd.s[i] > 0.0, "Singular values should be positive");
684            assert!(
685                svd.s[i - 1] >= svd.s[i],
686                "Singular values should be in descending order"
687            );
688        }
689    }
690
691    // Test with different power iteration settings
692    #[test]
693    fn test_power_iteration_impact() {
694        setup_thread_pool();
695
696        let mut coo = CooMatrix::<f64>::new(100, 50);
697        let mut rng = StdRng::seed_from_u64(987);
698
699        let mut u = vec![vec![0.0; 10]; 100];
700        let mut v = vec![vec![0.0; 10]; 50];
701
702        for i in 0..100 {
703            for j in 0..10 {
704                u[i][j] = rng.random_range(-1.0..1.0);
705            }
706        }
707
708        for i in 0..50 {
709            for j in 0..10 {
710                v[i][j] = rng.random_range(-1.0..1.0);
711            }
712        }
713
714        for i in 0..100 {
715            for j in 0..50 {
716                let mut val = 0.0;
717                for k in 0..10 {
718                    val += u[i][k] * v[j][k];
719                }
720                val += rng.random_range(-0.01..0.01);
721                coo.push(i, j, val);
722            }
723        }
724
725        let csr = CsrMatrix::from(&coo);
726
727        let powers = [0, 1, 3, 5];
728        let mut errors = Vec::new();
729
730        let mut dense_mat = Array2::<f64>::zeros((100, 50));
731        for (i, j, val) in csr.triplet_iter() {
732            dense_mat[[i, j]] = *val;
733        }
734        let matrix_norm = dense_mat.iter().map(|x| x.powi(2)).sum::<f64>().sqrt();
735
736        for &power in &powers {
737            let svd = randomized_svd(
738                &csr,
739                10,
740                5,
741                power,
742                PowerIterationNormalizer::QR,
743                false,
744                Some(42),
745                false,
746            )
747            .unwrap();
748
749            let recon = svd.recompose();
750            let mut error = 0.0;
751
752            for i in 0..100 {
753                for j in 0..50 {
754                    error += (dense_mat[[i, j]] - recon[[i, j]]).powi(2);
755                }
756            }
757
758            error = error.sqrt() / matrix_norm;
759            errors.push(error);
760
761            println!("Power iterations: {}, Relative error: {}", power, error);
762        }
763
764        let mut improved = false;
765        for i in 1..errors.len() {
766            if errors[i] < errors[0] * 0.9 {
767                improved = true;
768                break;
769            }
770        }
771
772        assert!(
773            improved,
774            "Power iterations did not improve accuracy as expected"
775        );
776    }
777}