scirs2-linalg 0.4.2

Linear algebra module for SciRS2 (scirs2-linalg)
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
//! Scalable Algorithms for Tall-and-Skinny and Short-and-Fat Matrices Example
//!
//! This example demonstrates the comprehensive scalable algorithm capabilities
//! that provide cutting-edge solutions for extreme aspect ratio matrices in
//! modern machine learning, data science, and scientific computing:
//!
//! - Tall-and-Skinny QR (TSQR) for communication-optimal decomposition
//! - LQ decomposition for short-and-fat matrices
//! - Adaptive algorithm selection based on aspect ratio
//! - Blocked matrix operations for memory efficiency
//! - Randomized sketching for dimensionality reduction
//! - Performance analytics and optimization recommendations
//!
//! These algorithms are crucial for big data applications where matrices
//! have millions of rows or columns but extreme aspect ratios.

use scirs2_core::ndarray::{Array1, Array2};
use scirs2_linalg::scalable::{
    adaptive_decomposition, blocked_matmul, classify_aspect_ratio, lq_decomposition,
    randomized_svd, tsqr, AspectRatio, ScalableConfig,
};
use std::time::Instant;

#[allow(dead_code)]
fn main() -> Result<(), Box<dyn std::error::Error>> {
    println!("šŸš€ SCALABLE ALGORITHMS - Advanced DEMONSTRATION");
    println!("=================================================");

    // Test 1: Aspect Ratio Classification
    println!("\n1. ASPECT RATIO CLASSIFICATION");
    println!("------------------------------");

    let aspect_examples = vec![
        (1000, 50, "Machine Learning Features"),
        (20, 800, "Compressed Sensing"),
        (200, 180, "Nearly Square Matrix"),
        (5000, 10, "Time Series Data"),
        (5, 2000, "Genomics Data"),
    ];

    for (m, n, description) in aspect_examples {
        let dummymatrix = Array2::<f64>::zeros((m, n));
        let aspect = classify_aspect_ratio(&dummymatrix.view(), 4.0);
        println!("   {} ({}Ɨ{}): {:?}", description, m, n, aspect);
    }

    // Test 2: Tall-and-Skinny QR (TSQR)
    println!("\n2. TALL-AND-SKINNY QR (TSQR): Communication-Optimal Decomposition");
    println!("------------------------------------------------------------------");

    // Create a tall-and-skinny matrix representing feature vectors
    let m_tall = 2000;
    let n_tall = 25;
    let tallmatrix = Array2::from_shape_fn((m_tall, n_tall), |(i, j)| {
        // Simulate feature matrix with some structure
        let freq1 = 2.0 * std::f64::consts::PI * (i as f64) / 100.0;
        let freq2 = 2.0 * std::f64::consts::PI * (j as f64) / 10.0;
        freq1.sin() + 0.5 * freq2.cos() + 0.1 * (i + j) as f64 / 1000.0
    });

    println!(
        "   Matrix dimensions: {}Ɨ{} (aspect ratio: {:.1})",
        m_tall,
        n_tall,
        m_tall as f64 / n_tall as f64
    );

    let config = ScalableConfig::default().with_blocksize(512);

    let start_time = Instant::now();
    let (q, r) = tsqr(&tallmatrix.view(), &config)?;
    let tsqr_time = start_time.elapsed();

    println!(
        "   TSQR computation time: {:.2}ms",
        tsqr_time.as_nanos() as f64 / 1_000_000.0
    );
    println!("   Q dimensions: {}Ɨ{}", q.nrows(), q.ncols());
    println!("   R dimensions: {}Ɨ{}", r.nrows(), r.ncols());

    // Verify orthogonality of Q
    let qtq = q.t().dot(&q);
    let identity = Array2::<f64>::eye(n_tall);
    let mut max_orthogonality_error = 0.0;
    for i in 0..n_tall {
        for j in 0..n_tall {
            let error = (qtq[[i, j]] - identity[[i, j]]).abs();
            if error > max_orthogonality_error {
                max_orthogonality_error = error;
            }
        }
    }
    println!("   Orthogonality error: {:.2e}", max_orthogonality_error);

    // Verify reconstruction: A ā‰ˆ Q*R
    let reconstructed = q.dot(&r);
    let mut max_reconstruction_error = 0.0;
    for i in 0..m_tall.min(100) {
        // Check first 100 rows for efficiency
        for j in 0..n_tall {
            let error = (tallmatrix[[i, j]] - reconstructed[[i, j]]).abs();
            if error > max_reconstruction_error {
                max_reconstruction_error = error;
            }
        }
    }
    println!("   Reconstruction error: {:.2e}", max_reconstruction_error);
    println!("   āœ… TSQR provides communication-optimal O(n²) complexity");

    // Test 3: LQ Decomposition for Short-and-Fat Matrices
    println!("\n3. LQ DECOMPOSITION: Optimal for Short-and-Fat Matrices");
    println!("-------------------------------------------------------");

    let m_short = 20;
    let n_short = 1000;
    let shortmatrix = Array2::from_shape_fn((m_short, n_short), |(i, j)| {
        // Simulate compressed sensing or genomics data
        let signal = (j as f64 / 50.0).sin() * (i as f64 + 1.0);
        signal + 0.01 * (i * j) as f64 / 10000.0
    });

    println!(
        "   Matrix dimensions: {}Ɨ{} (aspect ratio: {:.1})",
        m_short,
        n_short,
        m_short as f64 / n_short as f64
    );

    let start_time = Instant::now();
    let (l, q) = lq_decomposition(&shortmatrix.view(), &config)?;
    let lq_time = start_time.elapsed();

    println!(
        "   LQ computation time: {:.2}ms",
        lq_time.as_nanos() as f64 / 1_000_000.0
    );
    println!("   L dimensions: {}Ɨ{}", l.nrows(), l.ncols());
    println!("   Q dimensions: {}Ɨ{}", q.nrows(), q.ncols());

    // Verify L is lower triangular
    let mut max_upper_triangular = 0.0;
    for i in 0..m_short {
        for j in (i + 1)..m_short {
            let val = l[[i, j]].abs();
            if val > max_upper_triangular {
                max_upper_triangular = val;
            }
        }
    }
    println!("   Lower triangular error: {:.2e}", max_upper_triangular);

    // Verify reconstruction: A ā‰ˆ L*Q
    let lq_reconstructed = l.dot(&q);
    let mut max_lq_error = 0.0;
    for i in 0..m_short {
        for j in 0..n_short.min(100) {
            // Check first 100 columns for efficiency
            let error = (shortmatrix[[i, j]] - lq_reconstructed[[i, j]]).abs();
            if error > max_lq_error {
                max_lq_error = error;
            }
        }
    }
    println!("   Reconstruction error: {:.2e}", max_lq_error);
    println!("   āœ… LQ decomposition optimal for overdetermined systems");

    // Test 4: Adaptive Algorithm Selection
    println!("\n4. ADAPTIVE ALGORITHM SELECTION: Smart Optimization");
    println!("---------------------------------------------------");

    let test_matrices = vec![
        (
            "Tall Feature Matrix",
            Array2::from_shape_fn((800, 15), |(i, j)| (i + j + 1) as f64),
        ),
        (
            "Short Genomics Matrix",
            Array2::from_shape_fn((12, 600), |(i, j)| (i * j + 1) as f64),
        ),
        (
            "Square-ish Matrix",
            Array2::from_shape_fn((120, 100), |(i, j)| (i + j + 1) as f64),
        ),
    ];

    for (description, matrix) in test_matrices {
        println!("\n   Testing: {}", description);
        let (m, n) = matrix.dim();

        let result = adaptive_decomposition(&matrix.view(), &config)?;

        println!("     Detected aspect ratio: {:?}", result.aspect_ratio);
        println!("     Selected algorithm: {}", result.algorithm_used);
        println!(
            "     Complexity estimate: {} FLOPs",
            result.complexity_estimate
        );
        println!(
            "     Memory estimate: {:.1} KB",
            result.memory_estimate as f64 / 1024.0
        );

        let metrics = &result.performance_metrics;
        println!(
            "     Communication volume: {} elements",
            metrics.communication_volume
        );
        println!(
            "     Cache efficiency: {:.1}%",
            metrics.cache_efficiency * 100.0
        );
        println!(
            "     Memory bandwidth: {:.1} MB/s",
            metrics.memory_bandwidth
        );

        // Verify decomposition quality
        let reconstruction = result.factor1.dot(&result.factor2);
        let mut max_error = 0.0;
        for i in 0..m.min(50) {
            for j in 0..n.min(50) {
                let error = (matrix[[i, j]] - reconstruction[[i, j]]).abs();
                if error > max_error {
                    max_error = error;
                }
            }
        }
        println!("     Reconstruction error: {:.2e}", max_error);
    }

    // Test 5: Blocked Matrix Multiplication
    println!("\n5. BLOCKED MATRIX MULTIPLICATION: Memory-Efficient Operations");
    println!("------------------------------------------------------------");

    let asize = (400, 200);
    let bsize = (200, 300);

    let matrix_a = Array2::from_shape_fn(asize, |(i, j)| ((i + j + 1) as f64).sin() / 100.0);
    let matrix_b = Array2::from_shape_fn(bsize, |(i, j)| ((i * j + 1) as f64).cos() / 100.0);

    println!(
        "   Matrix A: {}Ɨ{}, Matrix B: {}Ɨ{}",
        asize.0, asize.1, bsize.0, bsize.1
    );

    // Compare blocked vs standard multiplication
    let config_small_blocks = config.clone().with_blocksize(64);

    let start_time = Instant::now();
    let result_blocked = blocked_matmul(&matrix_a.view(), &matrix_b.view(), &config_small_blocks)?;
    let blocked_time = start_time.elapsed();

    let start_time = Instant::now();
    let result_standard = matrix_a.dot(&matrix_b);
    let standard_time = start_time.elapsed();

    println!(
        "   Blocked multiplication time: {:.2}ms",
        blocked_time.as_nanos() as f64 / 1_000_000.0
    );
    println!(
        "   Standard multiplication time: {:.2}ms",
        standard_time.as_nanos() as f64 / 1_000_000.0
    );

    // Verify results are identical
    let mut max_diff = 0.0;
    for i in 0..asize.0 {
        for j in 0..bsize.1 {
            let diff = (result_blocked[[i, j]] - result_standard[[i, j]]).abs();
            if diff > max_diff {
                max_diff = diff;
            }
        }
    }
    println!("   Maximum difference: {:.2e}", max_diff);
    println!("   āœ… Blocked algorithm provides same results with better cache efficiency");

    // Test 6: Randomized SVD for Low-Rank Approximation
    println!("\n6. RANDOMIZED SVD: Efficient Low-Rank Approximation");
    println!("---------------------------------------------------");

    // Create a low-rank matrix for demonstration
    let rank_true = 10;
    let u_true =
        Array2::from_shape_fn((300, rank_true), |(i, j)| ((i + j + 1) as f64).sin() / 10.0);
    let s_true = Array1::from_shape_fn(rank_true, |i| {
        10.0 * (-(i as f64) / 2.0).exp() // Exponentially decaying singular values
    });
    let vt_true =
        Array2::from_shape_fn((rank_true, 200), |(i, j)| ((i * j + 1) as f64).cos() / 10.0);

    // Construct low-rank matrix: A = U * S * V^T
    let smatrix = Array2::from_diag(&s_true);
    let us = u_true.dot(&smatrix);
    let low_rankmatrix = us.dot(&vt_true);

    println!(
        "   Original matrix: {}Ɨ{} (true rank: {})",
        low_rankmatrix.nrows(),
        low_rankmatrix.ncols(),
        rank_true
    );

    let target_rank = 8;
    let config_randomized = config.clone().with_oversampling(4);

    let start_time = Instant::now();
    let (u_approx, s_approx, vt_approx) =
        randomized_svd(&low_rankmatrix.view(), target_rank, &config_randomized)?;
    let randomized_time = start_time.elapsed();

    println!(
        "   Randomized SVD time: {:.2}ms",
        randomized_time.as_nanos() as f64 / 1_000_000.0
    );
    println!("   Approximation rank: {}", target_rank);
    println!(
        "   Computed singular values: {:?}",
        s_approx
            .iter()
            .take(5)
            .map(|&x| format!("{:.3}", x))
            .collect::<Vec<_>>()
    );

    // Compute approximation error
    let s_diag = Array2::from_diag(&s_approx);
    let approximation = u_approx.dot(&s_diag).dot(&vt_approx);

    let mut approximation_error = 0.0;
    for i in 0..low_rankmatrix.nrows() {
        for j in 0..low_rankmatrix.ncols() {
            let error = (low_rankmatrix[[i, j]] - approximation[[i, j]]).powi(2);
            approximation_error += error;
        }
    }
    approximation_error = approximation_error.sqrt();

    let matrix_norm = low_rankmatrix.iter().map(|&x| x * x).sum::<f64>().sqrt();
    let relative_error = approximation_error / matrix_norm;

    println!("   Relative approximation error: {:.2e}", relative_error);
    println!("   āœ… Randomized SVD provides efficient low-rank approximation");

    // Test 7: Performance Scaling Analysis
    println!("\n7. PERFORMANCE SCALING ANALYSIS");
    println!("-------------------------------");

    let testsizes = vec![
        (500, 10, "Small tall"),
        (1000, 20, "Medium tall"),
        (2000, 40, "Large tall"),
        (10, 500, "Small fat"),
        (20, 1000, "Medium fat"),
        (40, 2000, "Large fat"),
    ];

    println!("   Scaling analysis for different matrix sizes:");
    println!("   Size        Aspect     Algorithm      Time (ms)  Complexity");

    for (m, n, _description) in testsizes {
        let testmatrix =
            Array2::from_shape_fn((m, n), |(i, j)| (i + j + 1) as f64 / (m + n) as f64);

        let start_time = Instant::now();
        let result = adaptive_decomposition(&testmatrix.view(), &config)?;
        let elapsed = start_time.elapsed();

        let aspect_str = match result.aspect_ratio {
            AspectRatio::TallSkinny => "Tall",
            AspectRatio::ShortFat => "Fat",
            AspectRatio::Square => "Square",
        };

        let algorithm_short = if result.algorithm_used.contains("TSQR") {
            "TSQR"
        } else if result.algorithm_used.contains("LQ") {
            "LQ"
        } else {
            "QR"
        };

        println!(
            "   {}Ɨ{:<4} {:<10} {:<14} {:>8.1}  {:>10}",
            m,
            n,
            aspect_str,
            algorithm_short,
            elapsed.as_nanos() as f64 / 1_000_000.0,
            result.complexity_estimate
        );
    }

    // Test 8: Big Data Applications Summary
    println!("\n8. BIG DATA APPLICATIONS");
    println!("------------------------");

    println!("   āœ… MACHINE LEARNING:");
    println!("      - Feature matrices in deep learning (tall-and-skinny)");
    println!("      - Mini-batch processing with TSQR");
    println!("      - Dimensionality reduction with randomized SVD");
    println!("      - Gradient computation in large-scale optimization");

    println!("   āœ… DATA SCIENCE:");
    println!("      - Time series analysis with tall feature vectors");
    println!("      - Principal component analysis on high-dimensional data");
    println!("      - Least squares regression with many samples");
    println!("      - Matrix completion and collaborative filtering");

    println!("   āœ… SCIENTIFIC COMPUTING:");
    println!("      - Compressed sensing and sparse recovery");
    println!("      - Genomics and bioinformatics (wide matrices)");
    println!("      - Climate modeling with spatial-temporal data");
    println!("      - Quantum chemistry and molecular dynamics");

    println!("   āœ… ENGINEERING:");
    println!("      - Signal processing and communications");
    println!("      - Computer vision and image processing");
    println!("      - Control systems and state estimation");
    println!("      - Finite element analysis and simulation");

    // Test 9: Algorithm Selection Guidelines
    println!("\n9. ALGORITHM SELECTION GUIDELINES");
    println!("---------------------------------");

    println!("   šŸ“Š ASPECT RATIO THRESHOLDS:");
    println!("      - Tall-and-skinny: height/width ≄ 4.0 → Use TSQR");
    println!("      - Short-and-fat: height/width ≤ 0.25 → Use LQ decomposition");
    println!("      - Square-ish: 0.25 < height/width < 4.0 → Use standard QR");

    println!("   šŸš€ PERFORMANCE OPTIMIZATIONS:");
    println!("      - Block size: 256-512 for cache efficiency");
    println!("      - TSQR: Optimal for m >> n, reduces communication O(n²)");
    println!("      - LQ: Natural for m << n, efficient least-norm solutions");
    println!("      - Randomized SVD: Oversampling 5-15 for numerical stability");

    println!("   šŸ’¾ MEMORY CONSIDERATIONS:");
    println!("      - TSQR: ~3mn memory for tall matrices");
    println!("      - LQ: ~2mn memory for fat matrices");
    println!("      - Blocked operations: Configurable memory footprint");
    println!("      - Randomized methods: Reduced memory for low-rank approximation");

    println!("\n========================================================");
    println!("šŸŽÆ Advanced ACHIEVEMENT: SCALABLE ALGORITHMS COMPLETE");
    println!("========================================================");
    println!("āœ… Tall-and-Skinny QR (TSQR): Communication-optimal O(n²) complexity");
    println!("āœ… LQ decomposition: Optimal for short-and-fat matrices");
    println!("āœ… Adaptive selection: Smart algorithm choice based on aspect ratio");
    println!("āœ… Blocked operations: Memory-efficient for massive matrices");
    println!("āœ… Randomized SVD: Probabilistic low-rank approximation");
    println!("āœ… Performance analytics: Comprehensive optimization metrics");
    println!("āœ… Big data ready: Designed for millions of rows/columns");
    println!("========================================================");

    Ok(())
}