use scirs2_core::gpu::{GpuBackend, GpuContext, GpuError};
use scirs2_core::ndarray::{Array1, Array2, ArrayView2, ScalarOperand};
use scirs2_core::numeric::{Float, NumAssign, One, Zero};
use std::iter::Sum;
use crate::error::{LinalgError, LinalgResult};
use super::types::{CholeskyFactors, LuFactors, QrFactors, SvdFactors};
const GPU_DECOMP_THRESHOLD: usize = 32;
pub fn gpu_lu<F>(ctx: Option<&GpuContext>, a: &ArrayView2<F>) -> LinalgResult<LuFactors<F>>
where
F: Float + NumAssign + One + Sum + Send + Sync + ScalarOperand + 'static,
{
validate_matrix(a, "LU decomposition")?;
let n = a.nrows();
if n >= GPU_DECOMP_THRESHOLD {
if let Some(gpu_ctx) = ctx {
match gpu_lu_impl(gpu_ctx, a) {
Ok(factors) => return Ok(factors),
Err(_) => {
}
}
}
}
cpu_lu(a)
}
fn gpu_lu_impl<F>(ctx: &GpuContext, a: &ArrayView2<F>) -> Result<LuFactors<F>, LinalgError>
where
F: Float + NumAssign + One + Sum + Send + Sync + ScalarOperand + 'static,
{
let n = a.nrows();
let m = a.ncols();
let a_flat: Vec<f64> = a.iter().map(|v| v.to_f64().unwrap_or(0.0)).collect();
let _gpu_buf = ctx.create_buffer_from_slice(&a_flat);
let mut work = a.to_owned();
let min_dim = n.min(m);
let mut piv: Vec<usize> = (0..n).collect();
let block_size = 32.min(min_dim);
let mut jb = 0;
while jb < min_dim {
let jb_end = (jb + block_size).min(min_dim);
let current_block = jb_end - jb;
for j in jb..jb_end {
let mut max_val = F::zero();
let mut max_row = j;
for i in j..n {
let val = work[[i, j]].abs();
if val > max_val {
max_val = val;
max_row = i;
}
}
if max_row != j {
for col in 0..m {
let tmp = work[[j, col]];
work[[j, col]] = work[[max_row, col]];
work[[max_row, col]] = tmp;
}
piv.swap(j, max_row);
}
let pivot = work[[j, j]];
if pivot.abs() < F::epsilon() * F::from(100.0).unwrap_or_else(F::one) {
}
if pivot.abs() > F::zero() {
for i in (j + 1)..n {
work[[i, j]] /= pivot;
}
}
for i in (j + 1)..n {
let factor = work[[i, j]];
for k in (j + 1)..jb_end {
let rhs = factor * work[[j, k]];
work[[i, k]] -= rhs;
}
}
}
if jb_end < m {
let trail_rows = n - jb;
let trail_cols = m - jb_end;
if trail_rows > 0 && trail_cols > 0 && current_block > 0 {
let l_rows = n - jb_end;
if l_rows > 0 {
let mut l_panel = vec![0.0_f64; l_rows * current_block];
for i in 0..l_rows {
for j in 0..current_block {
l_panel[i * current_block + j] =
work[[jb_end + i, jb + j]].to_f64().unwrap_or(0.0);
}
}
let mut u_top = vec![0.0_f64; current_block * trail_cols];
for i in 0..current_block {
for j in 0..trail_cols {
u_top[i * trail_cols + j] =
work[[jb + i, jb_end + j]].to_f64().unwrap_or(0.0);
}
}
let gpu_l = ctx.create_buffer_from_slice(&l_panel);
let gpu_u = ctx.create_buffer_from_slice(&u_top);
match ctx.gemm(&gpu_l, &gpu_u, l_rows, current_block, trail_cols) {
Ok(gpu_c) => {
let c_flat = gpu_c.to_vec();
for i in 0..l_rows {
for j in 0..trail_cols {
let val =
F::from(c_flat[i * trail_cols + j]).unwrap_or_else(F::zero);
work[[jb_end + i, jb_end + j]] -= val;
}
}
}
Err(_) => {
for i in 0..l_rows {
for j in 0..trail_cols {
let mut sum = F::zero();
for p in 0..current_block {
sum +=
work[[jb_end + i, jb + p]] * work[[jb + p, jb_end + j]];
}
work[[jb_end + i, jb_end + j]] -= sum;
}
}
}
}
}
}
}
jb = jb_end;
}
extract_plu(&work, &piv, n, m)
}
fn cpu_lu<F>(a: &ArrayView2<F>) -> LinalgResult<LuFactors<F>>
where
F: Float + NumAssign + One + Sum + Send + Sync + ScalarOperand + 'static,
{
let (p, l, u) = crate::lu(a, None)?;
Ok(LuFactors { p, l, u })
}
fn extract_plu<F>(
work: &Array2<F>,
piv: &[usize],
n: usize,
m: usize,
) -> Result<LuFactors<F>, LinalgError>
where
F: Float + NumAssign + One + Zero,
{
let min_dim = n.min(m);
let mut p = Array2::<F>::zeros((n, n));
for (i, &piv_idx) in piv.iter().enumerate() {
if piv_idx < n {
p[[i, piv_idx]] = F::one();
}
}
let mut l = Array2::<F>::zeros((n, min_dim));
for i in 0..n {
for j in 0..i.min(min_dim) {
l[[i, j]] = work[[i, j]];
}
if i < min_dim {
l[[i, i]] = F::one();
}
}
let mut u = Array2::<F>::zeros((min_dim, m));
for i in 0..min_dim {
for j in i..m {
u[[i, j]] = work[[i, j]];
}
}
Ok(LuFactors { p, l, u })
}
pub fn gpu_qr<F>(ctx: Option<&GpuContext>, a: &ArrayView2<F>) -> LinalgResult<QrFactors<F>>
where
F: Float + NumAssign + Sum + Send + Sync + ScalarOperand + 'static,
{
validate_matrix(a, "QR decomposition")?;
let (m, n) = a.dim();
if m >= GPU_DECOMP_THRESHOLD && n >= GPU_DECOMP_THRESHOLD {
if let Some(gpu_ctx) = ctx {
match gpu_qr_impl(gpu_ctx, a) {
Ok(factors) => return Ok(factors),
Err(_) => {
}
}
}
}
cpu_qr(a)
}
fn gpu_qr_impl<F>(ctx: &GpuContext, a: &ArrayView2<F>) -> Result<QrFactors<F>, LinalgError>
where
F: Float + NumAssign + Sum + Send + Sync + ScalarOperand + 'static,
{
let (m, n) = a.dim();
let min_dim = m.min(n);
let mut r = a.to_owned();
let mut q = Array2::<F>::zeros((m, m));
for i in 0..m {
q[[i, i]] = F::one();
}
for k in 0..min_dim {
let mut norm_sq = F::zero();
for i in k..m {
norm_sq += r[[i, k]] * r[[i, k]];
}
let norm = norm_sq.sqrt();
if norm < F::epsilon() {
continue;
}
let sign = if r[[k, k]] >= F::zero() {
F::one()
} else {
-F::one()
};
let mut v = vec![F::zero(); m - k];
v[0] = r[[k, k]] + sign * norm;
for i in 1..(m - k) {
v[i] = r[[k + i, k]];
}
let mut v_norm_sq = F::zero();
for vi in &v {
v_norm_sq += *vi * *vi;
}
if v_norm_sq < F::epsilon() {
continue;
}
let tau = F::from(2.0).unwrap_or_else(F::one) / v_norm_sq;
let trail_cols = n - k;
let trail_rows = m - k;
if trail_rows >= GPU_DECOMP_THRESHOLD && trail_cols >= GPU_DECOMP_THRESHOLD {
let mut vt_r = vec![F::zero(); trail_cols];
for j in 0..trail_cols {
let mut sum = F::zero();
for i in 0..trail_rows {
sum += v[i] * r[[k + i, k + j]];
}
vt_r[j] = sum;
}
let v_f64: Vec<f64> = v.iter().map(|x| x.to_f64().unwrap_or(0.0)).collect();
let vtr_f64: Vec<f64> = vt_r.iter().map(|x| x.to_f64().unwrap_or(0.0)).collect();
let gpu_v = ctx.create_buffer_from_slice(&v_f64);
let gpu_vtr = ctx.create_buffer_from_slice(&vtr_f64);
match ctx.gemm(&gpu_v, &gpu_vtr, trail_rows, 1, trail_cols) {
Ok(gpu_outer) => {
let outer_flat = gpu_outer.to_vec();
let tau_f64 = tau.to_f64().unwrap_or(2.0);
for i in 0..trail_rows {
for j in 0..trail_cols {
let val = F::from(tau_f64 * outer_flat[i * trail_cols + j])
.unwrap_or_else(F::zero);
r[[k + i, k + j]] -= val;
}
}
}
Err(_) => {
for i in 0..trail_rows {
for j in 0..trail_cols {
r[[k + i, k + j]] -= tau * v[i] * vt_r[j];
}
}
}
}
} else {
let mut vt_r = vec![F::zero(); trail_cols];
for j in 0..trail_cols {
let mut sum = F::zero();
for i in 0..trail_rows {
sum += v[i] * r[[k + i, k + j]];
}
vt_r[j] = sum;
}
for i in 0..trail_rows {
for j in 0..trail_cols {
r[[k + i, k + j]] -= tau * v[i] * vt_r[j];
}
}
}
let mut q_v = vec![F::zero(); m];
for i in 0..m {
let mut sum = F::zero();
for j in 0..trail_rows {
sum += q[[i, k + j]] * v[j];
}
q_v[i] = sum;
}
for i in 0..m {
for j in 0..trail_rows {
q[[i, k + j]] -= tau * q_v[i] * v[j];
}
}
}
let r_trimmed = r.slice(scirs2_core::ndarray::s![0..min_dim, ..]).to_owned();
let q_trimmed = q.slice(scirs2_core::ndarray::s![.., 0..min_dim]).to_owned();
Ok(QrFactors {
q: q_trimmed,
r: r_trimmed,
})
}
fn cpu_qr<F>(a: &ArrayView2<F>) -> LinalgResult<QrFactors<F>>
where
F: Float + NumAssign + Sum + Send + Sync + ScalarOperand + 'static,
{
let (q, r) = crate::qr(a, None)?;
Ok(QrFactors { q, r })
}
pub fn gpu_cholesky<F>(
ctx: Option<&GpuContext>,
a: &ArrayView2<F>,
) -> LinalgResult<CholeskyFactors<F>>
where
F: Float + NumAssign + Sum + Send + Sync + ScalarOperand + 'static,
{
validate_matrix(a, "Cholesky decomposition")?;
if a.nrows() != a.ncols() {
return Err(LinalgError::ShapeError(
"Cholesky decomposition requires a square matrix".to_string(),
));
}
let n = a.nrows();
if n >= GPU_DECOMP_THRESHOLD {
if let Some(gpu_ctx) = ctx {
match gpu_cholesky_impl(gpu_ctx, a) {
Ok(factors) => return Ok(factors),
Err(_) => {
}
}
}
}
cpu_cholesky(a)
}
fn gpu_cholesky_impl<F>(
ctx: &GpuContext,
a: &ArrayView2<F>,
) -> Result<CholeskyFactors<F>, LinalgError>
where
F: Float + NumAssign + Sum + Send + Sync + ScalarOperand + 'static,
{
let n = a.nrows();
let mut l = Array2::<F>::zeros((n, n));
for i in 0..n {
for j in 0..=i {
l[[i, j]] = a[[i, j]];
}
}
let block_size = 32.min(n);
let mut jb = 0;
while jb < n {
let jb_end = (jb + block_size).min(n);
let current_block = jb_end - jb;
for j in jb..jb_end {
let mut sum = l[[j, j]];
for k in jb..j {
sum -= l[[j, k]] * l[[j, k]];
}
if sum <= F::zero() {
return Err(LinalgError::NonPositiveDefiniteError(format!(
"Matrix is not positive definite at index {}",
j
)));
}
l[[j, j]] = sum.sqrt();
let diag = l[[j, j]];
for i in (j + 1)..jb_end {
let mut sum = l[[i, j]];
for k in jb..j {
sum -= l[[i, k]] * l[[j, k]];
}
l[[i, j]] = sum / diag;
}
}
if jb_end < n {
let trail_rows = n - jb_end;
for j in jb..jb_end {
let diag = l[[j, j]];
for i in jb_end..n {
let mut sum = l[[i, j]];
for k in jb..j {
sum -= l[[i, k]] * l[[j, k]];
}
l[[i, j]] = sum / diag;
}
}
let mut l_panel_flat = vec![0.0_f64; trail_rows * current_block];
for i in 0..trail_rows {
for j in 0..current_block {
l_panel_flat[i * current_block + j] =
l[[jb_end + i, jb + j]].to_f64().unwrap_or(0.0);
}
}
let gpu_l_panel = ctx.create_buffer_from_slice(&l_panel_flat);
match ctx.gemm_transpose_b(
&gpu_l_panel,
&gpu_l_panel,
trail_rows,
current_block,
trail_rows,
) {
Ok(gpu_update) => {
let update_flat = gpu_update.to_vec();
for i in 0..trail_rows {
for j in 0..=i {
let val =
F::from(update_flat[i * trail_rows + j]).unwrap_or_else(F::zero);
l[[jb_end + i, jb_end + j]] -= val;
}
}
}
Err(_) => {
for i in 0..trail_rows {
for j in 0..=i {
let mut sum = F::zero();
for p in 0..current_block {
sum += l[[jb_end + i, jb + p]] * l[[jb_end + j, jb + p]];
}
l[[jb_end + i, jb_end + j]] -= sum;
}
}
}
}
}
jb = jb_end;
}
Ok(CholeskyFactors { l })
}
fn cpu_cholesky<F>(a: &ArrayView2<F>) -> LinalgResult<CholeskyFactors<F>>
where
F: Float + NumAssign + Sum + Send + Sync + ScalarOperand + 'static,
{
let l = crate::cholesky(a, None)?;
Ok(CholeskyFactors { l })
}
pub fn gpu_svd<F>(ctx: Option<&GpuContext>, a: &ArrayView2<F>) -> LinalgResult<SvdFactors<F>>
where
F: Float + NumAssign + Sum + Send + Sync + ScalarOperand + 'static,
{
validate_matrix(a, "SVD")?;
let (m, n) = a.dim();
if m >= GPU_DECOMP_THRESHOLD && n >= GPU_DECOMP_THRESHOLD {
if let Some(gpu_ctx) = ctx {
match gpu_svd_impl(gpu_ctx, a) {
Ok(factors) => return Ok(factors),
Err(_) => {
}
}
}
}
cpu_svd(a)
}
fn gpu_svd_impl<F>(ctx: &GpuContext, a: &ArrayView2<F>) -> Result<SvdFactors<F>, LinalgError>
where
F: Float + NumAssign + Sum + Send + Sync + ScalarOperand + 'static,
{
let (m, n) = a.dim();
let transpose = m < n;
let a_flat: Vec<f64> = a.iter().map(|v| v.to_f64().unwrap_or(0.0)).collect();
let gpu_a = ctx.create_buffer_from_slice(&a_flat);
let (gram_size, gram_data) = if !transpose {
let gpu_ata = ctx
.gemm_transpose_a(&gpu_a, &gpu_a, n, m, n)
.map_err(|e| LinalgError::ComputationError(format!("GPU A^T*A failed: {}", e)))?;
(n, gpu_ata.to_vec())
} else {
let gpu_aat = ctx
.gemm_transpose_b(&gpu_a, &gpu_a, m, n, m)
.map_err(|e| LinalgError::ComputationError(format!("GPU A*A^T failed: {}", e)))?;
(m, gpu_aat.to_vec())
};
let gram_f: Vec<F> = gram_data
.iter()
.map(|&v| F::from(v).unwrap_or_else(F::zero))
.collect();
let gram = Array2::from_shape_vec((gram_size, gram_size), gram_f)
.map_err(|e| LinalgError::ShapeError(format!("Failed to reshape Gram matrix: {}", e)))?;
let (eigenvalues, eigenvectors) = symmetric_eigen(&gram)?;
let k = m.min(n);
let mut indices: Vec<usize> = (0..eigenvalues.len()).collect();
indices.sort_by(|&a_idx, &b_idx| {
let a_val = eigenvalues[b_idx].abs();
let b_val = eigenvalues[a_idx].abs();
a_val
.partial_cmp(&b_val)
.unwrap_or(std::cmp::Ordering::Equal)
});
let mut s = Array1::<F>::zeros(k);
for i in 0..k {
let ev = eigenvalues[indices[i]];
s[i] = if ev > F::zero() { ev.sqrt() } else { F::zero() };
}
if !transpose {
let mut v = Array2::<F>::zeros((k, n));
for i in 0..k {
for j in 0..n {
v[[i, j]] = eigenvectors[[j, indices[i]]];
}
}
let vt_flat: Vec<f64> = v.iter().map(|x| x.to_f64().unwrap_or(0.0)).collect();
let gpu_vt = ctx.create_buffer_from_slice(&vt_flat);
let mut v_col_major = vec![0.0_f64; n * k];
for i in 0..k {
for j in 0..n {
v_col_major[j * k + i] = v[[i, j]].to_f64().unwrap_or(0.0);
}
}
let gpu_v_cols = ctx.create_buffer_from_slice(&v_col_major);
let u = match ctx.gemm(&gpu_a, &gpu_v_cols, m, n, k) {
Ok(gpu_u) => {
let u_flat = gpu_u.to_vec();
let mut u_mat = Array2::<F>::zeros((m, k));
for i in 0..m {
for j in 0..k {
let val = F::from(u_flat[i * k + j]).unwrap_or_else(F::zero);
if s[j].abs() > F::epsilon() {
u_mat[[i, j]] = val / s[j];
}
}
}
u_mat
}
Err(_) => {
let mut u_mat = Array2::<F>::zeros((m, k));
for i in 0..m {
for j in 0..k {
let mut sum = F::zero();
for p in 0..n {
sum += a[[i, p]] * v[[j, p]]; }
if s[j].abs() > F::epsilon() {
u_mat[[i, j]] = sum / s[j];
}
}
}
u_mat
}
};
Ok(SvdFactors { u, s, vt: v })
} else {
let mut u = Array2::<F>::zeros((m, k));
for i in 0..m {
for j in 0..k {
u[[i, j]] = eigenvectors[[i, indices[j]]];
}
}
let ut_flat: Vec<f64> = {
let mut flat = vec![0.0_f64; k * m];
for i in 0..k {
for j in 0..m {
flat[i * m + j] = u[[j, i]].to_f64().unwrap_or(0.0);
}
}
flat
};
let gpu_ut = ctx.create_buffer_from_slice(&ut_flat);
let vt = match ctx.gemm(&gpu_ut, &gpu_a, k, m, n) {
Ok(gpu_vt) => {
let vt_flat = gpu_vt.to_vec();
let mut vt_mat = Array2::<F>::zeros((k, n));
for i in 0..k {
for j in 0..n {
let val = F::from(vt_flat[i * n + j]).unwrap_or_else(F::zero);
if s[i].abs() > F::epsilon() {
vt_mat[[i, j]] = val / s[i];
}
}
}
vt_mat
}
Err(_) => {
let mut vt_mat = Array2::<F>::zeros((k, n));
for i in 0..k {
for j in 0..n {
let mut sum = F::zero();
for p in 0..m {
sum += u[[p, i]] * a[[p, j]];
}
if s[i].abs() > F::epsilon() {
vt_mat[[i, j]] = sum / s[i];
}
}
}
vt_mat
}
};
Ok(SvdFactors { u, s, vt })
}
}
fn symmetric_eigen<F>(a: &Array2<F>) -> Result<(Array1<F>, Array2<F>), LinalgError>
where
F: Float + NumAssign + Sum + Send + Sync + ScalarOperand + 'static,
{
let n = a.nrows();
let mut d = a.clone();
let mut v = Array2::<F>::zeros((n, n));
for i in 0..n {
v[[i, i]] = F::one();
}
let max_iter = 100 * n * n;
let tol = F::epsilon() * F::from(100.0).unwrap_or_else(F::one);
for _ in 0..max_iter {
let mut max_off = F::zero();
let mut p = 0;
let mut q = 1;
for i in 0..n {
for j in (i + 1)..n {
let val = d[[i, j]].abs();
if val > max_off {
max_off = val;
p = i;
q = j;
}
}
}
if max_off < tol {
break;
}
let d_pp = d[[p, p]];
let d_qq = d[[q, q]];
let d_pq = d[[p, q]];
let theta = if (d_pp - d_qq).abs() < F::epsilon() {
F::from(std::f64::consts::FRAC_PI_4).unwrap_or_else(F::one)
} else {
let tau = F::from(2.0).unwrap_or_else(F::one) * d_pq / (d_pp - d_qq);
let t = if tau >= F::zero() {
F::one() / (tau + (F::one() + tau * tau).sqrt())
} else {
-F::one() / (-tau + (F::one() + tau * tau).sqrt())
};
t.atan()
};
let cos_t = theta.cos();
let sin_t = theta.sin();
let mut new_d = d.clone();
for i in 0..n {
if i != p && i != q {
let d_ip = d[[i, p]];
let d_iq = d[[i, q]];
new_d[[i, p]] = cos_t * d_ip + sin_t * d_iq;
new_d[[p, i]] = new_d[[i, p]];
new_d[[i, q]] = -sin_t * d_ip + cos_t * d_iq;
new_d[[q, i]] = new_d[[i, q]];
}
}
new_d[[p, p]] = cos_t * cos_t * d_pp
+ F::from(2.0).unwrap_or_else(F::one) * cos_t * sin_t * d_pq
+ sin_t * sin_t * d_qq;
new_d[[q, q]] = sin_t * sin_t * d_pp
- F::from(2.0).unwrap_or_else(F::one) * cos_t * sin_t * d_pq
+ cos_t * cos_t * d_qq;
new_d[[p, q]] = F::zero();
new_d[[q, p]] = F::zero();
d = new_d;
for i in 0..n {
let v_ip = v[[i, p]];
let v_iq = v[[i, q]];
v[[i, p]] = cos_t * v_ip + sin_t * v_iq;
v[[i, q]] = -sin_t * v_ip + cos_t * v_iq;
}
}
let mut eigenvalues = Array1::<F>::zeros(n);
for i in 0..n {
eigenvalues[i] = d[[i, i]];
}
Ok((eigenvalues, v))
}
fn cpu_svd<F>(a: &ArrayView2<F>) -> LinalgResult<SvdFactors<F>>
where
F: Float + NumAssign + Sum + Send + Sync + ScalarOperand + 'static,
{
let (u, s, vt) = crate::svd(a, false, None)?;
Ok(SvdFactors { u, s, vt })
}
fn validate_matrix<F>(a: &ArrayView2<F>, operation: &str) -> LinalgResult<()>
where
F: Float,
{
if a.is_empty() {
return Err(LinalgError::ShapeError(format!(
"{} failed: Input matrix cannot be empty",
operation
)));
}
for &val in a.iter() {
if !val.is_finite() {
return Err(LinalgError::InvalidInputError(format!(
"{} failed: Matrix contains non-finite values",
operation
)));
}
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use approx::assert_relative_eq;
use scirs2_core::ndarray::array;
#[test]
fn test_cpu_lu_basic() {
let a = array![[2.0_f64, 1.0], [4.0, 3.0]];
let factors = cpu_lu(&a.view()).expect("LU failed");
let pa = factors.p.dot(&a);
let lu = factors.l.dot(&factors.u);
for i in 0..2 {
for j in 0..2 {
assert_relative_eq!(pa[[i, j]], lu[[i, j]], epsilon = 1e-10);
}
}
}
#[test]
fn test_gpu_lu_fallback() {
let a = array![[1.0_f64, 2.0], [3.0, 4.0]];
let factors = gpu_lu(None, &a.view()).expect("LU fallback failed");
let pa = factors.p.dot(&a);
let lu = factors.l.dot(&factors.u);
for i in 0..2 {
for j in 0..2 {
assert_relative_eq!(pa[[i, j]], lu[[i, j]], epsilon = 1e-10);
}
}
}
#[test]
fn test_lu_empty_matrix() {
let a = Array2::<f64>::zeros((0, 0));
let result = gpu_lu(None, &a.view());
assert!(result.is_err());
}
#[test]
fn test_lu_3x3() {
let a = array![[1.0_f64, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 10.0]];
let factors = gpu_lu(None, &a.view()).expect("3x3 LU failed");
let pa = factors.p.dot(&a);
let lu = factors.l.dot(&factors.u);
for i in 0..3 {
for j in 0..3 {
assert_relative_eq!(pa[[i, j]], lu[[i, j]], epsilon = 1e-10);
}
}
}
#[test]
fn test_cpu_qr_basic() {
let a = array![[1.0_f64, 2.0], [3.0, 4.0]];
let factors = cpu_qr(&a.view()).expect("QR failed");
let qr_product = factors.q.dot(&factors.r);
for i in 0..2 {
for j in 0..2 {
assert_relative_eq!(qr_product[[i, j]], a[[i, j]], epsilon = 1e-10);
}
}
}
#[test]
fn test_gpu_qr_fallback() {
let a = array![
[12.0_f64, -51.0, 4.0],
[6.0, 167.0, -68.0],
[-4.0, 24.0, -41.0]
];
let factors = gpu_qr(None, &a.view()).expect("QR fallback failed");
let qr_product = factors.q.dot(&factors.r);
for i in 0..3 {
for j in 0..3 {
assert_relative_eq!(qr_product[[i, j]], a[[i, j]], epsilon = 1e-8);
}
}
}
#[test]
fn test_qr_empty_matrix() {
let a = Array2::<f64>::zeros((0, 0));
let result = gpu_qr(None, &a.view());
assert!(result.is_err());
}
#[test]
fn test_cpu_cholesky_basic() {
let a = array![[4.0_f64, 2.0], [2.0, 5.0]];
let factors = cpu_cholesky(&a.view()).expect("Cholesky failed");
let llt = factors.l.dot(&factors.l.t());
for i in 0..2 {
for j in 0..2 {
assert_relative_eq!(llt[[i, j]], a[[i, j]], epsilon = 1e-10);
}
}
}
#[test]
fn test_gpu_cholesky_fallback() {
let a = array![[25.0_f64, 15.0, -5.0], [15.0, 18.0, 0.0], [-5.0, 0.0, 11.0]];
let factors = gpu_cholesky(None, &a.view()).expect("Cholesky fallback failed");
let llt = factors.l.dot(&factors.l.t());
for i in 0..3 {
for j in 0..3 {
assert_relative_eq!(llt[[i, j]], a[[i, j]], epsilon = 1e-10);
}
}
}
#[test]
fn test_cholesky_non_square() {
let a = array![[1.0_f64, 2.0, 3.0], [4.0, 5.0, 6.0]];
let result = gpu_cholesky(None, &a.view());
assert!(result.is_err());
}
#[test]
fn test_cholesky_empty() {
let a = Array2::<f64>::zeros((0, 0));
let result = gpu_cholesky(None, &a.view());
assert!(result.is_err());
}
#[test]
fn test_cpu_svd_basic() {
let a = array![[1.0_f64, 2.0], [3.0, 4.0]];
let factors = cpu_svd(&a.view()).expect("SVD failed");
let s_diag = Array2::from_diag(&factors.s);
let usv = factors.u.dot(&s_diag).dot(&factors.vt);
for i in 0..2 {
for j in 0..2 {
assert_relative_eq!(usv[[i, j]], a[[i, j]], epsilon = 1e-10);
}
}
}
#[test]
fn test_gpu_svd_fallback() {
let a = array![[1.0_f64, 0.0], [0.0, 2.0]];
let factors = gpu_svd(None, &a.view()).expect("SVD fallback failed");
let mut s_sorted: Vec<f64> = factors.s.iter().copied().collect();
s_sorted.sort_by(|a, b| b.partial_cmp(a).unwrap_or(std::cmp::Ordering::Equal));
assert_relative_eq!(s_sorted[0], 2.0, epsilon = 1e-10);
assert_relative_eq!(s_sorted[1], 1.0, epsilon = 1e-10);
}
#[test]
fn test_svd_empty_matrix() {
let a = Array2::<f64>::zeros((0, 0));
let result = gpu_svd(None, &a.view());
assert!(result.is_err());
}
#[test]
fn test_svd_reconstruction() {
let a = array![[1.0_f64, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 10.0]];
let factors = gpu_svd(None, &a.view()).expect("SVD failed");
let s_diag = Array2::from_diag(&factors.s);
let usv = factors.u.dot(&s_diag).dot(&factors.vt);
for i in 0..3 {
for j in 0..3 {
assert_relative_eq!(usv[[i, j]], a[[i, j]], epsilon = 1e-8);
}
}
}
#[test]
fn test_validate_non_finite() {
let a = array![[1.0_f64, f64::NAN], [3.0, 4.0]];
let result = validate_matrix(&a.view(), "test");
assert!(result.is_err());
}
#[test]
fn test_validate_infinity() {
let a = array![[1.0_f64, f64::INFINITY], [3.0, 4.0]];
let result = validate_matrix(&a.view(), "test");
assert!(result.is_err());
}
#[test]
fn test_symmetric_eigen_diagonal() {
let a = array![[3.0_f64, 0.0], [0.0, 1.0]];
let (eigenvalues, eigenvectors) = symmetric_eigen(&a).expect("eigen failed");
let mut ev_sorted: Vec<f64> = eigenvalues.iter().copied().collect();
ev_sorted.sort_by(|a, b| b.partial_cmp(a).unwrap_or(std::cmp::Ordering::Equal));
assert_relative_eq!(ev_sorted[0], 3.0, epsilon = 1e-10);
assert_relative_eq!(ev_sorted[1], 1.0, epsilon = 1e-10);
}
#[test]
fn test_symmetric_eigen_identity() {
let a = array![[1.0_f64, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]];
let (eigenvalues, _) = symmetric_eigen(&a).expect("eigen identity failed");
for &ev in eigenvalues.iter() {
assert_relative_eq!(ev, 1.0, epsilon = 1e-10);
}
}
#[test]
fn test_gpu_lu_with_cpu_context() {
match GpuContext::new(GpuBackend::Cpu) {
Ok(gpu_ctx) => {
let a = array![[2.0_f64, 1.0], [4.0, 3.0]];
let factors = gpu_lu(Some(&gpu_ctx), &a.view()).expect("LU with ctx failed");
let pa = factors.p.dot(&a);
let lu = factors.l.dot(&factors.u);
for i in 0..2 {
for j in 0..2 {
assert_relative_eq!(pa[[i, j]], lu[[i, j]], epsilon = 1e-10);
}
}
}
Err(_) => {
}
}
}
#[test]
fn test_gpu_qr_with_cpu_context() {
if let Ok(gpu_ctx) = GpuContext::new(GpuBackend::Cpu) {
let a = array![[1.0_f64, 2.0], [3.0, 4.0]];
let factors = gpu_qr(Some(&gpu_ctx), &a.view()).expect("QR with ctx failed");
let qr_product = factors.q.dot(&factors.r);
for i in 0..2 {
for j in 0..2 {
assert_relative_eq!(qr_product[[i, j]], a[[i, j]], epsilon = 1e-10);
}
}
}
}
#[test]
fn test_gpu_cholesky_with_cpu_context() {
if let Ok(gpu_ctx) = GpuContext::new(GpuBackend::Cpu) {
let a = array![[4.0_f64, 2.0], [2.0, 5.0]];
let factors =
gpu_cholesky(Some(&gpu_ctx), &a.view()).expect("Cholesky with ctx failed");
let llt = factors.l.dot(&factors.l.t());
for i in 0..2 {
for j in 0..2 {
assert_relative_eq!(llt[[i, j]], a[[i, j]], epsilon = 1e-10);
}
}
}
}
#[test]
fn test_gpu_svd_with_cpu_context() {
if let Ok(gpu_ctx) = GpuContext::new(GpuBackend::Cpu) {
let a = array![[1.0_f64, 0.0], [0.0, 2.0]];
let factors = gpu_svd(Some(&gpu_ctx), &a.view()).expect("SVD with ctx failed");
let s_diag = Array2::from_diag(&factors.s);
let usv = factors.u.dot(&s_diag).dot(&factors.vt);
for i in 0..2 {
for j in 0..2 {
assert_relative_eq!(usv[[i, j]], a[[i, j]], epsilon = 1e-8);
}
}
}
}
}