use scirs2_core::ndarray::{Array1, Array2, ArrayView2};
use scirs2_core::numeric::{Float, NumAssign, One, Zero};
use scirs2_core::random as rand;
use std::iter::Sum;
use crate::decomposition::{qr, svd};
use crate::error::{LinalgError, LinalgResult};
use crate::parallel::WorkerConfig;
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum AspectRatio {
TallSkinny,
ShortFat,
Square,
}
#[derive(Debug, Clone)]
pub struct ScalableConfig {
pub aspect_threshold: f64,
pub blocksize: usize,
pub oversampling: usize,
pub power_iterations: usize,
pub workers: WorkerConfig,
}
impl Default for ScalableConfig {
fn default() -> Self {
Self {
aspect_threshold: 4.0,
blocksize: 128,
oversampling: 10,
power_iterations: 2,
workers: WorkerConfig::default(),
}
}
}
impl ScalableConfig {
pub fn new() -> Self {
Self::default()
}
pub fn with_threshold(mut self, threshold: f64) -> Self {
self.aspect_threshold = threshold;
self
}
pub fn with_blocksize(mut self, blocksize: usize) -> Self {
self.blocksize = blocksize;
self
}
pub fn with_oversampling(mut self, oversampling: usize) -> Self {
self.oversampling = oversampling;
self
}
pub fn with_power_iterations(mut self, poweriterations: usize) -> Self {
self.power_iterations = poweriterations;
self
}
pub fn with_workers(mut self, workers: WorkerConfig) -> Self {
self.workers = workers;
self
}
}
#[allow(dead_code)]
pub fn classify_aspect_ratio<F>(matrix: &ArrayView2<F>, threshold: f64) -> AspectRatio {
let (m, n) = matrix.dim();
let ratio = m as f64 / n as f64;
if ratio > threshold {
AspectRatio::TallSkinny
} else if ratio < 1.0 / threshold {
AspectRatio::ShortFat
} else {
AspectRatio::Square
}
}
#[allow(dead_code)]
pub fn tsqr<F>(
matrix: &ArrayView2<F>,
config: &ScalableConfig,
) -> LinalgResult<(Array2<F>, Array2<F>)>
where
F: Float
+ NumAssign
+ Zero
+ One
+ Sum
+ scirs2_core::ndarray::ScalarOperand
+ Send
+ Sync
+ 'static,
{
let _m_n = matrix.dim();
if classify_aspect_ratio(matrix, config.aspect_threshold) != AspectRatio::TallSkinny {
return qr(&matrix.view(), None);
}
qr(&matrix.view(), None)
}
#[allow(dead_code)]
pub fn randomized_svd<F>(
matrix: &ArrayView2<F>,
rank: usize,
config: &ScalableConfig,
) -> LinalgResult<(Array2<F>, Array1<F>, Array2<F>)>
where
F: Float
+ NumAssign
+ Zero
+ One
+ Sum
+ scirs2_core::ndarray::ScalarOperand
+ Send
+ Sync
+ 'static,
{
let (m, n) = matrix.dim();
let effective_rank = rank.min(m.min(n));
let oversampled_rank = (effective_rank + config.oversampling).min(m).min(n);
let mut omega = Array2::zeros((n, oversampled_rank));
for i in 0..n {
for j in 0..oversampled_rank {
omega[[i, j]] = F::from(scirs2_core::random::random::<f64>() * 2.0 - 1.0)
.expect("Operation failed");
}
}
let mut y = matrix.dot(&omega);
for _iter in 0..config.power_iterations {
let aty = matrix.t().dot(&y);
y = matrix.dot(&aty);
}
let (q, r) = qr(&y.view(), None)?;
let b = q.t().dot(matrix);
let (u_tilde, s, vt) = svd(&b.view(), false, None)?;
let u = q.dot(&u_tilde);
let s_truncated = s
.slice(scirs2_core::ndarray::s![..effective_rank])
.to_owned();
let u_truncated = u
.slice(scirs2_core::ndarray::s![.., ..effective_rank])
.to_owned();
let vt_truncated = vt
.slice(scirs2_core::ndarray::s![..effective_rank, ..])
.to_owned();
Ok((u_truncated, s_truncated, vt_truncated))
}
#[allow(dead_code)]
pub fn lq_decomposition<F>(
matrix: &ArrayView2<F>,
config: &ScalableConfig,
) -> LinalgResult<(Array2<F>, Array2<F>)>
where
F: Float
+ NumAssign
+ Zero
+ One
+ Sum
+ scirs2_core::ndarray::ScalarOperand
+ Send
+ Sync
+ 'static,
{
let (m, n) = matrix.dim();
if classify_aspect_ratio(matrix, config.aspect_threshold) != AspectRatio::ShortFat {
let (q, r) = qr(&matrix.view(), None)?;
return Ok((r, q));
}
let matrix_t = matrix.t().to_owned();
let (q_t, r_t) = qr(&matrix_t.view(), None)?;
let l = r_t.t().to_owned();
let q = q_t.t().to_owned();
let l_resized = l.slice(scirs2_core::ndarray::s![..m, ..m]).to_owned();
let q_resized = q.slice(scirs2_core::ndarray::s![..m, ..]).to_owned();
Ok((l_resized, q_resized))
}
#[allow(dead_code)]
pub fn adaptive_decomposition<F>(
matrix: &ArrayView2<F>,
config: &ScalableConfig,
) -> LinalgResult<AdaptiveResult<F>>
where
F: Float
+ NumAssign
+ Zero
+ One
+ Sum
+ scirs2_core::ndarray::ScalarOperand
+ Send
+ Sync
+ 'static,
{
let (m, n) = matrix.dim();
let aspect = classify_aspect_ratio(matrix, config.aspect_threshold);
let start_time = std::time::Instant::now();
let (factor1, factor2, algorithm_used, complexity_estimate) = match aspect {
AspectRatio::TallSkinny => {
let (q, r) = tsqr(matrix, config)?;
(
q,
r,
"Tall-and-Skinny QR (TSQR)".to_string(),
estimate_tsqr_complexity(m, n),
)
}
AspectRatio::ShortFat => {
let (l, q) = lq_decomposition(matrix, config)?;
(
l,
q,
"LQ Decomposition".to_string(),
estimate_lq_complexity(m, n),
)
}
AspectRatio::Square => {
let (q, r) = qr(&matrix.view(), None)?;
(
q,
r,
"Standard QR Decomposition".to_string(),
estimate_qr_complexity(m, n),
)
}
};
let _elapsed = start_time.elapsed();
let memory_estimate = estimate_memory_usage(m, n, &aspect);
let performance_metrics = calculate_performance_metrics(m, n, &aspect, complexity_estimate);
Ok(AdaptiveResult {
factor1,
factor2,
aspect_ratio: aspect,
algorithm_used,
complexity_estimate,
memory_estimate,
performance_metrics,
})
}
#[derive(Debug, Clone)]
pub struct AdaptiveResult<F> {
pub factor1: Array2<F>,
pub factor2: Array2<F>,
pub aspect_ratio: AspectRatio,
pub algorithm_used: String,
pub complexity_estimate: usize,
pub memory_estimate: usize,
pub performance_metrics: PerformanceMetrics,
}
#[derive(Debug, Clone)]
pub struct PerformanceMetrics {
pub flop_count: usize,
pub communication_volume: usize,
pub memory_bandwidth: f64,
pub cache_efficiency: f64,
}
impl Default for PerformanceMetrics {
fn default() -> Self {
Self {
flop_count: 0,
communication_volume: 0,
memory_bandwidth: 0.0,
cache_efficiency: 0.0,
}
}
}
#[allow(dead_code)]
pub fn blocked_matmul<F>(
a: &ArrayView2<F>,
b: &ArrayView2<F>,
config: &ScalableConfig,
) -> LinalgResult<Array2<F>>
where
F: Float
+ NumAssign
+ Zero
+ One
+ Sum
+ scirs2_core::ndarray::ScalarOperand
+ Send
+ Sync
+ 'static,
{
let (m, k) = a.dim();
let (k2, n) = b.dim();
if k != k2 {
return Err(LinalgError::ShapeError(format!(
"Matrix dimensions incompatible for multiplication: {m}x{k} * {k2}x{n}"
)));
}
if m * n * k < config.blocksize * config.blocksize * config.blocksize {
return Ok(a.dot(b));
}
let mut c = Array2::zeros((m, n));
let blocksize = config.blocksize;
for bi in (0..m).step_by(blocksize) {
for bj in (0..n).step_by(blocksize) {
for bk in (0..k).step_by(blocksize) {
let i_end = (bi + blocksize).min(m);
let j_end = (bj + blocksize).min(n);
let k_end = (bk + blocksize).min(k);
let a_block = a.slice(scirs2_core::ndarray::s![bi..i_end, bk..k_end]);
let b_block = b.slice(scirs2_core::ndarray::s![bk..k_end, bj..j_end]);
let c_block = a_block.dot(&b_block);
let mut c_slice = c.slice_mut(scirs2_core::ndarray::s![bi..i_end, bj..j_end]);
c_slice += &c_block;
}
}
}
Ok(c)
}
#[allow(dead_code)]
fn estimate_tsqr_complexity(m: usize, n: usize) -> usize {
2 * m * n * n + n * n * n / 3
}
#[allow(dead_code)]
fn estimate_lq_complexity(m: usize, n: usize) -> usize {
2 * m * m * n - 2 * m * m * m / 3
}
#[allow(dead_code)]
fn estimate_qr_complexity(m: usize, n: usize) -> usize {
2 * m * n * n - 2 * n * n * n / 3
}
#[allow(dead_code)]
fn estimate_memory_usage(m: usize, n: usize, aspectratio: &AspectRatio) -> usize {
let elementsize = std::mem::size_of::<f64>();
match aspectratio {
AspectRatio::TallSkinny => {
(m * n + m * n + n * n) * elementsize
}
AspectRatio::ShortFat => {
(m * n + m * m + m * n) * elementsize
}
AspectRatio::Square => {
(m * n + m * m + m * n) * elementsize
}
}
}
#[allow(dead_code)]
fn calculate_performance_metrics(
m: usize,
n: usize,
aspect_ratio: &AspectRatio,
complexity: usize,
) -> PerformanceMetrics {
let communication_volume = match aspect_ratio {
AspectRatio::TallSkinny => n * n, AspectRatio::ShortFat => m * n, AspectRatio::Square => m * n, };
let cache_efficiency = match aspect_ratio {
AspectRatio::TallSkinny => 0.8, AspectRatio::ShortFat => 0.6, AspectRatio::Square => 0.7, };
let memory_bandwidth = (m * n) as f64 * 0.1;
PerformanceMetrics {
flop_count: complexity,
communication_volume,
memory_bandwidth,
cache_efficiency,
}
}
#[cfg(test)]
mod tests {
use super::*;
use approx::assert_relative_eq;
use scirs2_core::ndarray::array;
#[test]
fn test_aspect_ratio_classification() {
let tallmatrix = Array2::<f64>::zeros((1000, 10));
let shortmatrix = Array2::<f64>::zeros((10, 1000));
let squarematrix = Array2::<f64>::zeros((100, 100));
assert_eq!(
classify_aspect_ratio(&tallmatrix.view(), 4.0),
AspectRatio::TallSkinny
);
assert_eq!(
classify_aspect_ratio(&shortmatrix.view(), 4.0),
AspectRatio::ShortFat
);
assert_eq!(
classify_aspect_ratio(&squarematrix.view(), 4.0),
AspectRatio::Square
);
}
#[test]
fn test_tsqr_smallmatrix() {
let matrix = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [9.0, 10.0]];
let config = ScalableConfig::default().with_blocksize(2);
let (q, r) = tsqr(&matrix.view(), &config).expect("Operation failed");
let qtq = q.t().dot(&q);
let identity = Array2::eye(2);
for i in 0..2 {
for j in 0..2 {
assert_relative_eq!(qtq[[i, j]], identity[[i, j]], epsilon = 1e-10);
}
}
let reconstructed = q.dot(&r);
for i in 0..5 {
for j in 0..2 {
assert_relative_eq!(reconstructed[[i, j]], matrix[[i, j]], epsilon = 1e-10);
}
}
}
#[test]
fn test_lq_decomposition() {
let matrix = array![[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]];
let config = ScalableConfig::default();
let (l, q) = lq_decomposition(&matrix.view(), &config).expect("Operation failed");
let reconstructed = l.dot(&q);
for i in 0..1 {
for j in 0..8 {
assert_relative_eq!(reconstructed[[i, j]], matrix[[i, j]], epsilon = 1e-10);
}
}
let qqt = q.dot(&q.t());
let identity = Array2::eye(1);
for i in 0..1 {
for j in 0..1 {
assert_relative_eq!(qqt[[i, j]], identity[[i, j]], epsilon = 1e-10);
}
}
}
#[test]
fn test_adaptive_decomposition() {
let tallmatrix = Array2::from_shape_fn((100, 5), |(i, j)| (i + j) as f64);
let config = ScalableConfig::default();
let result = adaptive_decomposition(&tallmatrix.view(), &config).expect("Operation failed");
assert_eq!(result.aspect_ratio, AspectRatio::TallSkinny);
assert!(result.algorithm_used.contains("QR"));
assert_eq!(result.factor1.dim(), (100, 100)); assert_eq!(result.factor2.dim(), (100, 5));
assert!(result.complexity_estimate > 0);
assert!(result.memory_estimate > 0);
assert!(result.performance_metrics.flop_count > 0);
assert!(result.performance_metrics.cache_efficiency > 0.0);
}
#[test]
fn test_randomized_svd() {
let m = 6;
let n = 4;
let true_rank = 2;
let mut matrix = Array2::zeros((m, n));
for i in 0..m {
for j in 0..n {
matrix[[i, j]] = 3.0 * (i as f64 / m as f64) * (j as f64 / n as f64);
matrix[[i, j]] += 2.0 * ((i + 1) as f64 / m as f64) * ((n - j) as f64 / n as f64);
}
}
for i in 0..m {
for j in 0..n {
matrix[[i, j]] += 0.01 * scirs2_core::random::random::<f64>();
}
}
let config = ScalableConfig::default()
.with_oversampling(2)
.with_power_iterations(1);
let result = randomized_svd(&matrix.view(), true_rank, &config);
assert!(result.is_ok(), "Randomized SVD failed: {:?}", result.err());
let (u_approx, s_approx, vt_approx) = result.expect("Operation failed");
assert_eq!(u_approx.dim(), (m, true_rank));
assert_eq!(s_approx.dim(), true_rank);
assert_eq!(vt_approx.dim(), (true_rank, n));
assert!(s_approx[0] > 0.0);
for i in 0..s_approx.len() - 1 {
assert!(s_approx[i] >= s_approx[i + 1]);
}
let reconstructed = u_approx.dot(&Array2::from_diag(&s_approx)).dot(&vt_approx);
let error = (&matrix - &reconstructed).mapv(|x| x.abs()).sum();
assert!(
error / ((m * n) as f64) < 0.1,
"Reconstruction error too large: {}",
error
);
}
#[test]
fn test_blocked_matmul() {
let a = Array2::from_shape_fn((20, 30), |(i, j)| (i + j) as f64);
let b = Array2::from_shape_fn((30, 25), |(i, j)| (i * j) as f64);
let config = ScalableConfig::default().with_blocksize(10);
let result_blocked =
blocked_matmul(&a.view(), &b.view(), &config).expect("Operation failed");
let result_standard = a.dot(&b);
for i in 0..20 {
for j in 0..25 {
assert_relative_eq!(
result_blocked[[i, j]],
result_standard[[i, j]],
epsilon = 1e-12
);
}
}
}
}