use scirs2_core::ndarray::{Array1, Array2};
use scirs2_linalg::scalable::{
adaptive_decomposition, blocked_matmul, classify_aspect_ratio, lq_decomposition,
randomized_svd, tsqr, AspectRatio, ScalableConfig,
};
use std::time::Instant;
#[allow(dead_code)]
fn main() -> Result<(), Box<dyn std::error::Error>> {
println!("š SCALABLE ALGORITHMS - Advanced DEMONSTRATION");
println!("=================================================");
println!("\n1. ASPECT RATIO CLASSIFICATION");
println!("------------------------------");
let aspect_examples = vec![
(1000, 50, "Machine Learning Features"),
(20, 800, "Compressed Sensing"),
(200, 180, "Nearly Square Matrix"),
(5000, 10, "Time Series Data"),
(5, 2000, "Genomics Data"),
];
for (m, n, description) in aspect_examples {
let dummymatrix = Array2::<f64>::zeros((m, n));
let aspect = classify_aspect_ratio(&dummymatrix.view(), 4.0);
println!(" {} ({}Ć{}): {:?}", description, m, n, aspect);
}
println!("\n2. TALL-AND-SKINNY QR (TSQR): Communication-Optimal Decomposition");
println!("------------------------------------------------------------------");
let m_tall = 2000;
let n_tall = 25;
let tallmatrix = Array2::from_shape_fn((m_tall, n_tall), |(i, j)| {
let freq1 = 2.0 * std::f64::consts::PI * (i as f64) / 100.0;
let freq2 = 2.0 * std::f64::consts::PI * (j as f64) / 10.0;
freq1.sin() + 0.5 * freq2.cos() + 0.1 * (i + j) as f64 / 1000.0
});
println!(
" Matrix dimensions: {}Ć{} (aspect ratio: {:.1})",
m_tall,
n_tall,
m_tall as f64 / n_tall as f64
);
let config = ScalableConfig::default().with_blocksize(512);
let start_time = Instant::now();
let (q, r) = tsqr(&tallmatrix.view(), &config)?;
let tsqr_time = start_time.elapsed();
println!(
" TSQR computation time: {:.2}ms",
tsqr_time.as_nanos() as f64 / 1_000_000.0
);
println!(" Q dimensions: {}Ć{}", q.nrows(), q.ncols());
println!(" R dimensions: {}Ć{}", r.nrows(), r.ncols());
let qtq = q.t().dot(&q);
let identity = Array2::<f64>::eye(n_tall);
let mut max_orthogonality_error = 0.0;
for i in 0..n_tall {
for j in 0..n_tall {
let error = (qtq[[i, j]] - identity[[i, j]]).abs();
if error > max_orthogonality_error {
max_orthogonality_error = error;
}
}
}
println!(" Orthogonality error: {:.2e}", max_orthogonality_error);
let reconstructed = q.dot(&r);
let mut max_reconstruction_error = 0.0;
for i in 0..m_tall.min(100) {
for j in 0..n_tall {
let error = (tallmatrix[[i, j]] - reconstructed[[i, j]]).abs();
if error > max_reconstruction_error {
max_reconstruction_error = error;
}
}
}
println!(" Reconstruction error: {:.2e}", max_reconstruction_error);
println!(" ā
TSQR provides communication-optimal O(n²) complexity");
println!("\n3. LQ DECOMPOSITION: Optimal for Short-and-Fat Matrices");
println!("-------------------------------------------------------");
let m_short = 20;
let n_short = 1000;
let shortmatrix = Array2::from_shape_fn((m_short, n_short), |(i, j)| {
let signal = (j as f64 / 50.0).sin() * (i as f64 + 1.0);
signal + 0.01 * (i * j) as f64 / 10000.0
});
println!(
" Matrix dimensions: {}Ć{} (aspect ratio: {:.1})",
m_short,
n_short,
m_short as f64 / n_short as f64
);
let start_time = Instant::now();
let (l, q) = lq_decomposition(&shortmatrix.view(), &config)?;
let lq_time = start_time.elapsed();
println!(
" LQ computation time: {:.2}ms",
lq_time.as_nanos() as f64 / 1_000_000.0
);
println!(" L dimensions: {}Ć{}", l.nrows(), l.ncols());
println!(" Q dimensions: {}Ć{}", q.nrows(), q.ncols());
let mut max_upper_triangular = 0.0;
for i in 0..m_short {
for j in (i + 1)..m_short {
let val = l[[i, j]].abs();
if val > max_upper_triangular {
max_upper_triangular = val;
}
}
}
println!(" Lower triangular error: {:.2e}", max_upper_triangular);
let lq_reconstructed = l.dot(&q);
let mut max_lq_error = 0.0;
for i in 0..m_short {
for j in 0..n_short.min(100) {
let error = (shortmatrix[[i, j]] - lq_reconstructed[[i, j]]).abs();
if error > max_lq_error {
max_lq_error = error;
}
}
}
println!(" Reconstruction error: {:.2e}", max_lq_error);
println!(" ā
LQ decomposition optimal for overdetermined systems");
println!("\n4. ADAPTIVE ALGORITHM SELECTION: Smart Optimization");
println!("---------------------------------------------------");
let test_matrices = vec![
(
"Tall Feature Matrix",
Array2::from_shape_fn((800, 15), |(i, j)| (i + j + 1) as f64),
),
(
"Short Genomics Matrix",
Array2::from_shape_fn((12, 600), |(i, j)| (i * j + 1) as f64),
),
(
"Square-ish Matrix",
Array2::from_shape_fn((120, 100), |(i, j)| (i + j + 1) as f64),
),
];
for (description, matrix) in test_matrices {
println!("\n Testing: {}", description);
let (m, n) = matrix.dim();
let result = adaptive_decomposition(&matrix.view(), &config)?;
println!(" Detected aspect ratio: {:?}", result.aspect_ratio);
println!(" Selected algorithm: {}", result.algorithm_used);
println!(
" Complexity estimate: {} FLOPs",
result.complexity_estimate
);
println!(
" Memory estimate: {:.1} KB",
result.memory_estimate as f64 / 1024.0
);
let metrics = &result.performance_metrics;
println!(
" Communication volume: {} elements",
metrics.communication_volume
);
println!(
" Cache efficiency: {:.1}%",
metrics.cache_efficiency * 100.0
);
println!(
" Memory bandwidth: {:.1} MB/s",
metrics.memory_bandwidth
);
let reconstruction = result.factor1.dot(&result.factor2);
let mut max_error = 0.0;
for i in 0..m.min(50) {
for j in 0..n.min(50) {
let error = (matrix[[i, j]] - reconstruction[[i, j]]).abs();
if error > max_error {
max_error = error;
}
}
}
println!(" Reconstruction error: {:.2e}", max_error);
}
println!("\n5. BLOCKED MATRIX MULTIPLICATION: Memory-Efficient Operations");
println!("------------------------------------------------------------");
let asize = (400, 200);
let bsize = (200, 300);
let matrix_a = Array2::from_shape_fn(asize, |(i, j)| ((i + j + 1) as f64).sin() / 100.0);
let matrix_b = Array2::from_shape_fn(bsize, |(i, j)| ((i * j + 1) as f64).cos() / 100.0);
println!(
" Matrix A: {}Ć{}, Matrix B: {}Ć{}",
asize.0, asize.1, bsize.0, bsize.1
);
let config_small_blocks = config.clone().with_blocksize(64);
let start_time = Instant::now();
let result_blocked = blocked_matmul(&matrix_a.view(), &matrix_b.view(), &config_small_blocks)?;
let blocked_time = start_time.elapsed();
let start_time = Instant::now();
let result_standard = matrix_a.dot(&matrix_b);
let standard_time = start_time.elapsed();
println!(
" Blocked multiplication time: {:.2}ms",
blocked_time.as_nanos() as f64 / 1_000_000.0
);
println!(
" Standard multiplication time: {:.2}ms",
standard_time.as_nanos() as f64 / 1_000_000.0
);
let mut max_diff = 0.0;
for i in 0..asize.0 {
for j in 0..bsize.1 {
let diff = (result_blocked[[i, j]] - result_standard[[i, j]]).abs();
if diff > max_diff {
max_diff = diff;
}
}
}
println!(" Maximum difference: {:.2e}", max_diff);
println!(" ā
Blocked algorithm provides same results with better cache efficiency");
println!("\n6. RANDOMIZED SVD: Efficient Low-Rank Approximation");
println!("---------------------------------------------------");
let rank_true = 10;
let u_true =
Array2::from_shape_fn((300, rank_true), |(i, j)| ((i + j + 1) as f64).sin() / 10.0);
let s_true = Array1::from_shape_fn(rank_true, |i| {
10.0 * (-(i as f64) / 2.0).exp() });
let vt_true =
Array2::from_shape_fn((rank_true, 200), |(i, j)| ((i * j + 1) as f64).cos() / 10.0);
let smatrix = Array2::from_diag(&s_true);
let us = u_true.dot(&smatrix);
let low_rankmatrix = us.dot(&vt_true);
println!(
" Original matrix: {}Ć{} (true rank: {})",
low_rankmatrix.nrows(),
low_rankmatrix.ncols(),
rank_true
);
let target_rank = 8;
let config_randomized = config.clone().with_oversampling(4);
let start_time = Instant::now();
let (u_approx, s_approx, vt_approx) =
randomized_svd(&low_rankmatrix.view(), target_rank, &config_randomized)?;
let randomized_time = start_time.elapsed();
println!(
" Randomized SVD time: {:.2}ms",
randomized_time.as_nanos() as f64 / 1_000_000.0
);
println!(" Approximation rank: {}", target_rank);
println!(
" Computed singular values: {:?}",
s_approx
.iter()
.take(5)
.map(|&x| format!("{:.3}", x))
.collect::<Vec<_>>()
);
let s_diag = Array2::from_diag(&s_approx);
let approximation = u_approx.dot(&s_diag).dot(&vt_approx);
let mut approximation_error = 0.0;
for i in 0..low_rankmatrix.nrows() {
for j in 0..low_rankmatrix.ncols() {
let error = (low_rankmatrix[[i, j]] - approximation[[i, j]]).powi(2);
approximation_error += error;
}
}
approximation_error = approximation_error.sqrt();
let matrix_norm = low_rankmatrix.iter().map(|&x| x * x).sum::<f64>().sqrt();
let relative_error = approximation_error / matrix_norm;
println!(" Relative approximation error: {:.2e}", relative_error);
println!(" ā
Randomized SVD provides efficient low-rank approximation");
println!("\n7. PERFORMANCE SCALING ANALYSIS");
println!("-------------------------------");
let testsizes = vec![
(500, 10, "Small tall"),
(1000, 20, "Medium tall"),
(2000, 40, "Large tall"),
(10, 500, "Small fat"),
(20, 1000, "Medium fat"),
(40, 2000, "Large fat"),
];
println!(" Scaling analysis for different matrix sizes:");
println!(" Size Aspect Algorithm Time (ms) Complexity");
for (m, n, _description) in testsizes {
let testmatrix =
Array2::from_shape_fn((m, n), |(i, j)| (i + j + 1) as f64 / (m + n) as f64);
let start_time = Instant::now();
let result = adaptive_decomposition(&testmatrix.view(), &config)?;
let elapsed = start_time.elapsed();
let aspect_str = match result.aspect_ratio {
AspectRatio::TallSkinny => "Tall",
AspectRatio::ShortFat => "Fat",
AspectRatio::Square => "Square",
};
let algorithm_short = if result.algorithm_used.contains("TSQR") {
"TSQR"
} else if result.algorithm_used.contains("LQ") {
"LQ"
} else {
"QR"
};
println!(
" {}Ć{:<4} {:<10} {:<14} {:>8.1} {:>10}",
m,
n,
aspect_str,
algorithm_short,
elapsed.as_nanos() as f64 / 1_000_000.0,
result.complexity_estimate
);
}
println!("\n8. BIG DATA APPLICATIONS");
println!("------------------------");
println!(" ā
MACHINE LEARNING:");
println!(" - Feature matrices in deep learning (tall-and-skinny)");
println!(" - Mini-batch processing with TSQR");
println!(" - Dimensionality reduction with randomized SVD");
println!(" - Gradient computation in large-scale optimization");
println!(" ā
DATA SCIENCE:");
println!(" - Time series analysis with tall feature vectors");
println!(" - Principal component analysis on high-dimensional data");
println!(" - Least squares regression with many samples");
println!(" - Matrix completion and collaborative filtering");
println!(" ā
SCIENTIFIC COMPUTING:");
println!(" - Compressed sensing and sparse recovery");
println!(" - Genomics and bioinformatics (wide matrices)");
println!(" - Climate modeling with spatial-temporal data");
println!(" - Quantum chemistry and molecular dynamics");
println!(" ā
ENGINEERING:");
println!(" - Signal processing and communications");
println!(" - Computer vision and image processing");
println!(" - Control systems and state estimation");
println!(" - Finite element analysis and simulation");
println!("\n9. ALGORITHM SELECTION GUIDELINES");
println!("---------------------------------");
println!(" š ASPECT RATIO THRESHOLDS:");
println!(" - Tall-and-skinny: height/width ā„ 4.0 ā Use TSQR");
println!(" - Short-and-fat: height/width ⤠0.25 ā Use LQ decomposition");
println!(" - Square-ish: 0.25 < height/width < 4.0 ā Use standard QR");
println!(" š PERFORMANCE OPTIMIZATIONS:");
println!(" - Block size: 256-512 for cache efficiency");
println!(" - TSQR: Optimal for m >> n, reduces communication O(n²)");
println!(" - LQ: Natural for m << n, efficient least-norm solutions");
println!(" - Randomized SVD: Oversampling 5-15 for numerical stability");
println!(" š¾ MEMORY CONSIDERATIONS:");
println!(" - TSQR: ~3mn memory for tall matrices");
println!(" - LQ: ~2mn memory for fat matrices");
println!(" - Blocked operations: Configurable memory footprint");
println!(" - Randomized methods: Reduced memory for low-rank approximation");
println!("\n========================================================");
println!("šÆ Advanced ACHIEVEMENT: SCALABLE ALGORITHMS COMPLETE");
println!("========================================================");
println!("ā
Tall-and-Skinny QR (TSQR): Communication-optimal O(n²) complexity");
println!("ā
LQ decomposition: Optimal for short-and-fat matrices");
println!("ā
Adaptive selection: Smart algorithm choice based on aspect ratio");
println!("ā
Blocked operations: Memory-efficient for massive matrices");
println!("ā
Randomized SVD: Probabilistic low-rank approximation");
println!("ā
Performance analytics: Comprehensive optimization metrics");
println!("ā
Big data ready: Designed for millions of rows/columns");
println!("========================================================");
Ok(())
}