scirs2-linalg 0.4.2

Linear algebra module for SciRS2 (scirs2-linalg)
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
//! Mixed-precision linear algebra example
//!
//! This example demonstrates the use of mixed-precision operations
//! to balance accuracy and performance in scientific computing.

use scirs2_core::ndarray::{array, Array1, Array2};
use scirs2_linalg::mixed_precision::conversions::convert;
use scirs2_linalg::mixed_precision::*;

// Simple benchmark function to compare execution times
#[allow(dead_code)]
fn benchmark_fn<F>(name: &str, mut f: F) -> std::time::Duration
where
    F: FnMut(),
{
    use std::time::Instant;

    // Warm up
    for _ in 0..5 {
        f();
    }

    // Actual measurement
    let iterations = 100;
    let start = Instant::now();

    for _ in 0..iterations {
        f();
    }

    let duration = start.elapsed() / iterations;
    println!("{}: {:?} per iteration", name, duration);

    duration
}

#[allow(dead_code)]
fn main() {
    println!("Mixed-Precision Linear Algebra Examples");
    println!("=======================================\n");

    // Example 1: Basic type conversion
    println!("Example 1: Basic type conversion");
    println!("---------------------------------");

    // Create a high-precision array
    let arr_f64 = array![1.0, 2.0, 3.0, 4.0, 5.0];
    println!("Original f64 array: {:?}", arr_f64);

    // Convert to lower precision
    let arr_f32: Array1<f32> = convert(&arr_f64.view());
    println!("Converted to f32: {:?}", arr_f32);

    // Convert back to higher precision
    let arr_f64_again: Array1<f64> = convert(&arr_f32.view());
    println!("Converted back to f64: {:?}", arr_f64_again);

    // Show that the conversion is lossless for these simple values
    println!("Conversion error: {:?}", &arr_f64 - &arr_f64_again);
    println!();

    // Example 2: 2D array conversion
    println!("Example 2: 2D array conversion");
    println!("------------------------------");

    // Create a high-precision 2D array
    let mat_f64 = array![[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0],];
    println!("Original f64 matrix:\n{:?}", mat_f64);

    // Convert to lower precision
    let mat_f32: Array2<f32> = convert_2d(&mat_f64.view());
    println!("Converted to f32:\n{:?}", mat_f32);
    println!();

    // Example 3: Mixed-precision matrix-vector multiplication
    println!("Example 3: Mixed-precision matrix-vector multiplication");
    println!("------------------------------------------------------");

    // Create a matrix and vector in f32 precision
    let a_f32 = array![
        [1.0f32, 2.0f32, 3.0f32],
        [4.0f32, 5.0f32, 6.0f32],
        [7.0f32, 8.0f32, 9.0f32],
    ];
    let x_f32 = array![0.1f32, 0.2f32, 0.3f32];

    println!("Matrix A (f32):\n{:?}", a_f32);
    println!("Vector x (f32):\n{:?}", x_f32);

    // Compute in f32
    let y_f32 = a_f32.dot(&x_f32);
    println!("Standard computation (f32):\n{:?}", y_f32);

    // Compute with internal f64 precision but result in f32
    let y_mp = mixed_precision_matvec::<f32, f32, f32, f64>(&a_f32.view(), &x_f32.view())
        .expect("Operation failed");
    println!("Mixed-precision computation (f32->f64->f32):\n{:?}", y_mp);

    // Check difference
    println!("Difference: {:?}", &y_f32 - &y_mp);
    println!();

    // Example 4: Mixed-precision solving of linear systems
    println!("Example 4: Mixed-precision linear system solver");
    println!("----------------------------------------------");

    // Create an ill-conditioned system in f32 precision
    // Hilbert matrix is notoriously ill-conditioned
    let mut hilbert_f32 = Array2::<f32>::zeros((5, 5));
    for i in 0..5 {
        for j in 0..5 {
            hilbert_f32[[i, j]] = 1.0 / ((i + j + 1) as f32);
        }
    }

    // Create a simple right-hand side
    let b_f32 = array![1.0f32, 0.0, 0.0, 0.0, 0.0];

    println!("Hilbert matrix (5x5, f32):");
    for i in 0..5 {
        println!("{:?}", hilbert_f32.row(i));
    }
    println!("Right-hand side: {:?}", b_f32);

    // First solve using pure f32 precision
    let x_pure_f32 =
        mixed_precision_solve::<f32, f32, f32, f32>(&hilbert_f32.view(), &b_f32.view())
            .expect("Operation failed");

    // Then solve using internal f64 precision
    let x_mp = mixed_precision_solve::<f32, f32, f32, f64>(&hilbert_f32.view(), &b_f32.view())
        .expect("Operation failed");

    println!("Solution with pure f32 precision: {:?}", x_pure_f32);
    println!("Solution with mixed precision (internal f64): {:?}", x_mp);

    // Check how well the solutions satisfy A*x = b
    let ax_f32 = hilbert_f32.dot(&x_pure_f32);
    let ax_mp = hilbert_f32.dot(&x_mp);

    println!("Residual with pure f32: {:?}", &ax_f32 - &b_f32);
    println!("Residual with mixed precision: {:?}", &ax_mp - &b_f32);

    // Compute residual norms to see which solution is more accurate
    let residual_norm_f32 = (&ax_f32 - &b_f32)
        .iter()
        .map(|&x| x * x)
        .sum::<f32>()
        .sqrt();
    let residual_norm_mp = (&ax_mp - &b_f32).iter().map(|&x| x * x).sum::<f32>().sqrt();

    println!("Residual norm (pure f32): {:.6e}", residual_norm_f32);
    println!("Residual norm (mixed precision): {:.6e}", residual_norm_mp);
    println!(
        "Improvement factor: {:.2}x",
        residual_norm_f32 / residual_norm_mp
    );
    println!();

    // Example 5: Working with very small or large numbers
    println!("Example 5: Working with very small or large numbers");
    println!("--------------------------------------------------");

    // Create a matrix with small and large values
    let a_extreme = array![[1.0e-6f32, 1.0e6f32], [1.0e6f32, 1.0e-6f32],];
    let b_extreme = array![1.0f32, 1.0f32];

    println!("Matrix with extreme values (f32):\n{:?}", a_extreme);
    println!("Right-hand side: {:?}", b_extreme);

    // Solve with pure f32
    let x_f32_extreme =
        mixed_precision_solve::<f32, f32, f32, f32>(&a_extreme.view(), &b_extreme.view())
            .expect("Operation failed");

    // Solve with internal f64
    let x_mp_extreme =
        mixed_precision_solve::<f32, f32, f32, f64>(&a_extreme.view(), &b_extreme.view())
            .expect("Operation failed");

    println!("Solution with pure f32: {:?}", x_f32_extreme);
    println!("Solution with mixed precision: {:?}", x_mp_extreme);

    // Check residuals
    let ax_f32_extreme = a_extreme.dot(&x_f32_extreme);
    let ax_mp_extreme = a_extreme.dot(&x_mp_extreme);

    println!("Residual with pure f32: {:?}", &ax_f32_extreme - &b_extreme);
    println!(
        "Residual with mixed precision: {:?}",
        &ax_mp_extreme - &b_extreme
    );

    // Example 6: Matrix multiplication with mixed precision
    println!("Example 6: Matrix multiplication with mixed precision");
    println!("-------------------------------------------------");

    // Create two matrices in f32 precision
    let a_mult = array![[1.0e-4f32, 1.0e4f32], [1.0e4f32, 1.0e-4f32]];

    let b_mult = array![[1.0e-4f32, 1.0e4f32], [1.0e4f32, 1.0e-4f32]];

    println!("Matrix A:\n{:?}", a_mult);
    println!("Matrix B:\n{:?}", b_mult);

    // Compute product with standard f32 precision
    let c_f32 = a_mult.dot(&b_mult);
    println!("Standard computation (f32):\n{:?}", c_f32);

    // Compute with internal f64 precision but result in f32
    let c_mp = mixed_precision_matmul::<f32, f32, f32, f64>(&a_mult.view(), &b_mult.view())
        .expect("Operation failed");
    println!("Mixed-precision computation (f32->f64->f32):\n{:?}", c_mp);

    // Check difference
    println!("Difference: {:?}", &c_f32 - &c_mp);
    println!();

    // Example 7: Condition number computation with mixed precision
    println!("Example 7: Condition number computation with mixed precision");
    println!("----------------------------------------------------------");

    // Create increasingly ill-conditioned matrices
    println!("Condition numbers of matrices with different conditioning:");

    // Well-conditioned matrix
    let a_well = array![[3.0f32, 2.0f32], [1.0f32, 5.0f32]];

    // Moderately ill-conditioned matrix
    let a_moderate = array![[1.0f32, 2.0f32], [1.001f32, 2.001f32]];

    // Severely ill-conditioned matrix (nearly singular)
    let a_severe = array![[1.0f32, 2.0f32], [1.0f32, 2.0000001f32]];

    // Compute condition numbers with both f32 and f64 internal precision
    let cond_well_f32 =
        mixed_precision_cond::<f32, f32, f32>(&a_well.view(), None).expect("Operation failed");
    let cond_well_f64 =
        mixed_precision_cond::<f32, f32, f64>(&a_well.view(), None).expect("Operation failed");

    let cond_moderate_f32 =
        mixed_precision_cond::<f32, f32, f32>(&a_moderate.view(), None).expect("Operation failed");
    let cond_moderate_f64 =
        mixed_precision_cond::<f32, f32, f64>(&a_moderate.view(), None).expect("Operation failed");

    let cond_severe_f32 =
        mixed_precision_cond::<f32, f32, f32>(&a_severe.view(), None).expect("Operation failed");
    let cond_severe_f64 =
        mixed_precision_cond::<f32, f32, f64>(&a_severe.view(), None).expect("Operation failed");

    println!("Well-conditioned matrix:");
    println!("  f32 precision: {:.2e}", cond_well_f32);
    println!("  f64 precision: {:.2e}", cond_well_f64);
    println!(
        "  Relative difference: {:.2}%",
        100.0 * (cond_well_f64 - cond_well_f32).abs() / cond_well_f32
    );
    println!();

    println!("Moderately ill-conditioned matrix:");
    println!("  f32 precision: {:.2e}", cond_moderate_f32);
    println!("  f64 precision: {:.2e}", cond_moderate_f64);
    println!(
        "  Relative difference: {:.2}%",
        100.0 * (cond_moderate_f64 - cond_moderate_f32).abs() / cond_moderate_f32
    );
    println!();

    println!("Severely ill-conditioned matrix:");
    println!("  f32 precision: {:.2e}", cond_severe_f32);
    println!("  f64 precision: {:.2e}", cond_severe_f64);
    println!(
        "  Relative difference: {:.2}%",
        100.0 * (cond_severe_f64 - cond_severe_f32).abs() / cond_severe_f32
    );
    println!();

    println!("\nConclusion:");
    println!("Mixed-precision operations can significantly improve accuracy");
    println!("for numerically challenging problems while maintaining the");
    println!("memory efficiency of lower precision storage.");
    println!();
    println!("This is particularly important for:");
    println!("1. Ill-conditioned linear systems");
    println!("2. Matrices with very large condition numbers");
    println!("3. Computations involving very small or very large numbers");
    println!("4. Maintaining both speed and accuracy in scientific computing");

    // Example 8: Performance comparison
    println!("\nExample 8: Performance comparison");
    println!("--------------------------------");

    // Create a larger matrix and vector for meaningful benchmarks
    let size = 500; // 500x500 matrix
    let largematrix =
        Array2::<f32>::from_shape_fn((size, size), |(i, j)| if i == j { 1.0 } else { 0.01 });

    let large_vector = Array1::<f32>::from_shape_fn(size, |i| (i % 10) as f32 / 10.0);

    println!("Benchmarking with {}x{} matrix", size, size);
    println!("----------------------------------");

    // Benchmark matrix-vector multiplication
    println!("\nMatrix-vector multiplication:");

    let f32_time = benchmark_fn("Standard f32", || {
        let _ = largematrix.dot(&large_vector);
    });

    let mp_time = benchmark_fn("Mixed precision (f32->f64->f32)", || {
        let _ =
            mixed_precision_matvec::<f32, f32, f32, f64>(&largematrix.view(), &large_vector.view())
                .expect("Operation failed");
    });

    let speedup = f32_time.as_nanos() as f64 / mp_time.as_nanos() as f64;
    println!("Relative performance: {:.2}x", speedup);

    // Create an ill-conditioned matrix for solver benchmarks
    println!("\nLinear system solving (ill-conditioned):");

    // Create a simple, slightly ill-conditioned matrix
    let matrixsize = 10;
    let mut testmatrix = Array2::<f32>::eye(matrixsize);
    // Add small off-diagonal elements to make it slightly ill-conditioned
    // but not singular
    for i in 0..matrixsize - 1 {
        testmatrix[[i, i + 1]] = 0.1;
        testmatrix[[i + 1, i]] = 0.1;
    }

    let b_large = Array1::<f32>::from_shape_fn(matrixsize, |i| if i == 0 { 1.0 } else { 0.0 });

    let f32_solve_time = benchmark_fn("Pure f32 solver", || {
        let _ = mixed_precision_solve::<f32, f32, f32, f32>(&testmatrix.view(), &b_large.view())
            .expect("Operation failed");
    });

    let mp_solve_time = benchmark_fn("Mixed precision solver (f64 internal)", || {
        let _ = mixed_precision_solve::<f32, f32, f32, f64>(&testmatrix.view(), &b_large.view())
            .expect("Operation failed");
    });

    let solve_speedup = f32_solve_time.as_nanos() as f64 / mp_solve_time.as_nanos() as f64;
    println!("Relative solver performance: {:.2}x", solve_speedup);

    println!("\nNOTE: The mixed-precision versions often take slightly longer");
    println!("due to type conversions, but provide better numerical accuracy,");
    println!("especially for ill-conditioned problems or when working with");
    println!("values of vastly different magnitudes.");

    // Example 9: SIMD-accelerated mixed precision operations
    #[cfg(feature = "simd")]
    {
        println!("\nExample 9: SIMD-accelerated mixed precision operations");
        println!("---------------------------------------------------");

        // Create test matrices and vectors with a mix of large and small values
        // to highlight precision issues
        let size = 1000;
        let a_vec = Array1::<f32>::from_shape_fn(size, |i| if i % 2 == 0 { 1.0e-6 } else { 1.0e6 });

        let b_vec = Array1::<f32>::from_shape_fn(size, |i| if i % 2 == 0 { 1.0e6 } else { 1.0e-6 });

        // Create smaller matrices for matmul demonstration
        let matsize = 100;
        let a_mat = Array2::<f32>::from_shape_fn((matsize, matsize), |(i, j)| {
            if (i + j) % 2 == 0 {
                1.0e-6
            } else {
                1.0e6
            }
        });

        let b_mat = Array2::<f32>::from_shape_fn((matsize, matsize), |(i, j)| {
            if (i + j) % 3 == 0 {
                1.0e6
            } else {
                1.0e-6
            }
        });

        println!("\nComparing SIMD-accelerated vs standard mixed precision:");

        // Benchmark dot product operations
        println!("\nDot product (vector length: {}):", size);

        let standard_time = benchmark_fn("Standard f32 dot product", || {
            let _ = a_vec.dot(&b_vec);
        });

        let standard_mp_time = benchmark_fn("Regular mixed precision dot product", || {
            let _: f64 =
                mixed_precision_dot_f32::<f32, f32, f64, f64>(&a_vec.view(), &b_vec.view())
                    .expect("Operation failed");
        });

        let simd_time = benchmark_fn("Mixed precision dot product (alternative)", || {
            // Note: SIMD-specific functions are not available, using regular mixed precision
            let _: f64 =
                mixed_precision_dot_f32::<f32, f32, f64, f64>(&a_vec.view(), &b_vec.view())
                    .expect("Operation failed");
        });

        println!(
            "SIMD speedup vs standard f32: {:.2}x",
            standard_time.as_nanos() as f64 / simd_time.as_nanos() as f64
        );
        println!(
            "SIMD speedup vs regular mixed precision: {:.2}x",
            standard_mp_time.as_nanos() as f64 / simd_time.as_nanos() as f64
        );

        // Benchmark matrix-vector multiplication
        let row_vec = a_mat.row(0).to_owned();

        println!(
            "\nMatrix-vector multiplication ({}x{} matrix):",
            matsize, matsize
        );

        let standard_mv_time = benchmark_fn("Standard f32 matrix-vector multiplication", || {
            let _ = a_mat.dot(&row_vec);
        });

        let standard_mp_mv_time = benchmark_fn(
            "Regular mixed precision matrix-vector multiplication",
            || {
                let _ =
                    mixed_precision_matvec::<f32, f32, f32, f64>(&a_mat.view(), &row_vec.view())
                        .expect("Operation failed");
            },
        );

        let simd_mv_time = benchmark_fn(
            "SIMD-accelerated mixed precision matrix-vector multiplication",
            || {
                let _: Array1<f64> =
                    mixed_precision_matvec::<f32, f32, f64, f64>(&a_mat.view(), &row_vec.view())
                        .expect("Operation failed");
            },
        );

        println!(
            "SIMD speedup vs standard f32: {:.2}x",
            standard_mv_time.as_nanos() as f64 / simd_mv_time.as_nanos() as f64
        );
        println!(
            "SIMD speedup vs regular mixed precision: {:.2}x",
            standard_mp_mv_time.as_nanos() as f64 / simd_mv_time.as_nanos() as f64
        );

        // Benchmark matrix multiplication
        println!(
            "\nMatrix multiplication ({}x{} matrices):",
            matsize, matsize
        );

        let standard_mm_time = benchmark_fn("Standard f32 matrix multiplication", || {
            let _ = a_mat.dot(&b_mat);
        });

        let standard_mp_mm_time =
            benchmark_fn("Regular mixed precision matrix multiplication", || {
                let _ = mixed_precision_matmul::<f32, f32, f32, f64>(&a_mat.view(), &b_mat.view())
                    .expect("Operation failed");
            });

        let simd_mm_time = benchmark_fn(
            "SIMD-accelerated mixed precision matrix multiplication",
            || {
                let _ = simd_mixed_precision_matmul_f32_f64::<f32>(&a_mat.view(), &b_mat.view())
                    .expect("Operation failed");
            },
        );

        println!(
            "SIMD speedup vs standard f32: {:.2}x",
            standard_mm_time.as_nanos() as f64 / simd_mm_time.as_nanos() as f64
        );
        println!(
            "SIMD speedup vs regular mixed precision: {:.2}x",
            standard_mp_mm_time.as_nanos() as f64 / simd_mm_time.as_nanos() as f64
        );

        // Check accuracy differences
        println!("\nAccuracy comparison:");

        // Create vectors with values that benefit from higher precision
        let a_precision = array![1.0e-7f32, 2.0e7, 3.0e-7, 4.0e7, 5.0e-7, 6.0e7, 7.0e-7, 8.0e7];
        let b_precision = array![9.0e-7f32, 8.0e7, 7.0e-7, 6.0e7, 5.0e-7, 4.0e7, 3.0e-7, 2.0e7];

        // Compute standard f32 dot product
        let dot_f32 = a_precision
            .iter()
            .zip(b_precision.iter())
            .map(|(&x, &y)| x * y)
            .sum::<f32>();

        // Compute mixed precision dot product
        let dot_simd_f64: f64 =
            mixed_precision_dot_f32::<f32, f32, f64, f64>(&a_precision.view(), &b_precision.view())
                .expect("Operation failed");

        // Compute "ground truth" with explicit f64 conversions
        let ground_truth = a_precision
            .iter()
            .zip(b_precision.iter())
            .map(|(&x, &y)| (x as f64) * (y as f64))
            .sum::<f64>();

        println!("Standard f32 result: {:.10e}", dot_f32);
        println!("SIMD mixed precision result: {:.10e}", dot_simd_f64);
        println!("Ground truth (manual f64): {:.10e}", ground_truth);

        println!(
            "Relative error (standard f32): {:.10e}",
            ((dot_f32 as f64) - ground_truth).abs() / ground_truth
        );
        println!(
            "Relative error (SIMD mixed precision): {:.10e}",
            (dot_simd_f64 - ground_truth).abs() / ground_truth
        );

        // Summary
        println!("\nSIMD-accelerated mixed precision provides:");
        println!("1. Better performance than regular mixed precision");
        println!("2. Better numerical accuracy than standard f32");
        println!("3. Efficient handling of values with large dynamic range");
        println!("4. Ideal for performance-critical scientific computing applications");
    }

    #[cfg(not(feature = "simd"))]
    {
        println!("\nExample 9: SIMD-accelerated operations (not available)");
        println!("---------------------------------------------------");
        println!("To enable SIMD-accelerated mixed-precision operations,");
        println!("rebuild with the 'simd' feature enabled:");
        println!("    cargo build --features=\"simd\"");
        println!("    cargo run --example mixed_precision_example --features=\"simd\"");
    }
}