use scirs2_autograd::ndarray::{s, Array1, Array2, ArrayView2, ArrayViewMut2};
#[cfg(feature = "no-std")]
use alloc::{string::ToString, vec::Vec};
#[cfg(not(feature = "no-std"))]
use std::{string::ToString, vec::Vec};
pub unsafe fn matrix_multiply(
a: *const f32,
b: *const f32,
c: *mut f32,
m: usize,
n: usize,
k: usize,
) -> Result<(), crate::traits::SimdError> {
if a.is_null() || b.is_null() || c.is_null() {
return Err(crate::traits::SimdError::InvalidInput(
"Null pointer provided".to_string(),
));
}
for i in 0..m {
for j in 0..n {
let mut sum = 0.0f32;
for ki in 0..k {
sum += *a.add(i * k + ki) * *b.add(ki * n + j);
}
*c.add(i * n + j) = sum;
}
}
Ok(())
}
pub fn matrix_multiply_f32_simd(a: &Array2<f32>, b: &Array2<f32>) -> Array2<f32> {
let (m, k) = a.dim();
let (k2, n) = b.dim();
assert_eq!(k, k2, "Matrix dimensions must match for multiplication");
let mut result = Array2::zeros((m, n));
const BLOCK_SIZE: usize = 64;
for i_block in (0..m).step_by(BLOCK_SIZE) {
for j_block in (0..n).step_by(BLOCK_SIZE) {
for k_block in (0..k).step_by(BLOCK_SIZE) {
let i_end = (i_block + BLOCK_SIZE).min(m);
let j_end = (j_block + BLOCK_SIZE).min(n);
let k_end = (k_block + BLOCK_SIZE).min(k);
matrix_multiply_block_simd(
&a.slice(s![i_block..i_end, k_block..k_end]),
&b.slice(s![k_block..k_end, j_block..j_end]),
&mut result.slice_mut(s![i_block..i_end, j_block..j_end]),
);
}
}
}
result
}
fn matrix_multiply_block_simd(
a: &ArrayView2<f32>,
b: &ArrayView2<f32>,
c: &mut ArrayViewMut2<f32>,
) {
let (_m, _k) = a.dim();
let (_, _n) = b.dim();
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
if crate::simd_feature_detected!("avx2") && crate::simd_feature_detected!("fma") {
unsafe { matrix_multiply_avx2_fma(a, b, c) };
return;
} else if crate::simd_feature_detected!("avx2") {
unsafe { matrix_multiply_avx2(a, b, c) };
return;
} else if crate::simd_feature_detected!("sse2") {
unsafe { matrix_multiply_sse2(a, b, c) };
return;
}
}
matrix_multiply_scalar(a, b, c);
}
fn matrix_multiply_scalar(a: &ArrayView2<f32>, b: &ArrayView2<f32>, c: &mut ArrayViewMut2<f32>) {
let (m, k) = a.dim();
let (_, n) = b.dim();
for i in 0..m {
for j in 0..n {
let mut sum = c[[i, j]]; for ki in 0..k {
sum += a[[i, ki]] * b[[ki, j]];
}
c[[i, j]] = sum;
}
}
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[target_feature(enable = "sse2")]
unsafe fn matrix_multiply_sse2(
a: &ArrayView2<f32>,
b: &ArrayView2<f32>,
c: &mut ArrayViewMut2<f32>,
) {
use core::arch::x86_64::*;
let (m, k) = a.dim();
let (_, n) = b.dim();
for i in 0..m {
for j in (0..n).step_by(4) {
let mut sum = if j + 4 <= n {
_mm_loadu_ps(&c[[i, j]])
} else {
_mm_setzero_ps()
};
for ki in 0..k {
let a_val = _mm_set1_ps(a[[i, ki]]);
if j + 4 <= n {
let b_vec = _mm_loadu_ps(&b[[ki, j]]);
sum = _mm_add_ps(sum, _mm_mul_ps(a_val, b_vec));
} else {
let mut b_vals = [0.0f32; 4];
for jj in 0..(n - j) {
b_vals[jj] = b[[ki, j + jj]];
}
let b_vec = _mm_loadu_ps(b_vals.as_ptr());
sum = _mm_add_ps(sum, _mm_mul_ps(a_val, b_vec));
}
}
if j + 4 <= n {
_mm_storeu_ps(&mut c[[i, j]], sum);
} else {
let mut result = [0.0f32; 4];
_mm_storeu_ps(result.as_mut_ptr(), sum);
for jj in 0..(n - j) {
c[[i, j + jj]] = result[jj];
}
}
}
}
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[target_feature(enable = "avx2")]
unsafe fn matrix_multiply_avx2(
a: &ArrayView2<f32>,
b: &ArrayView2<f32>,
c: &mut ArrayViewMut2<f32>,
) {
use core::arch::x86_64::*;
let (m, k) = a.dim();
let (_, n) = b.dim();
for i in 0..m {
for j in (0..n).step_by(8) {
let mut sum = if j + 8 <= n {
_mm256_loadu_ps(&c[[i, j]])
} else {
_mm256_setzero_ps()
};
for ki in 0..k {
let a_val = _mm256_set1_ps(a[[i, ki]]);
if j + 8 <= n {
let b_vec = _mm256_loadu_ps(&b[[ki, j]]);
sum = _mm256_add_ps(sum, _mm256_mul_ps(a_val, b_vec));
} else {
let mut b_vals = [0.0f32; 8];
for jj in 0..(n - j) {
b_vals[jj] = b[[ki, j + jj]];
}
let b_vec = _mm256_loadu_ps(b_vals.as_ptr());
sum = _mm256_add_ps(sum, _mm256_mul_ps(a_val, b_vec));
}
}
if j + 8 <= n {
_mm256_storeu_ps(&mut c[[i, j]], sum);
} else {
let mut result = [0.0f32; 8];
_mm256_storeu_ps(result.as_mut_ptr(), sum);
for jj in 0..(n - j) {
c[[i, j + jj]] = result[jj];
}
}
}
}
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[target_feature(enable = "avx2", enable = "fma")]
unsafe fn matrix_multiply_avx2_fma(
a: &ArrayView2<f32>,
b: &ArrayView2<f32>,
c: &mut ArrayViewMut2<f32>,
) {
use core::arch::x86_64::*;
let (m, k) = a.dim();
let (_, n) = b.dim();
for i in 0..m {
for j in (0..n).step_by(8) {
let mut sum = if j + 8 <= n {
_mm256_loadu_ps(&c[[i, j]])
} else {
_mm256_setzero_ps()
};
for ki in 0..k {
let a_val = _mm256_set1_ps(a[[i, ki]]);
if j + 8 <= n {
let b_vec = _mm256_loadu_ps(&b[[ki, j]]);
sum = _mm256_fmadd_ps(a_val, b_vec, sum);
} else {
let mut b_vals = [0.0f32; 8];
for jj in 0..(n - j) {
b_vals[jj] = b[[ki, j + jj]];
}
let b_vec = _mm256_loadu_ps(b_vals.as_ptr());
sum = _mm256_fmadd_ps(a_val, b_vec, sum);
}
}
if j + 8 <= n {
_mm256_storeu_ps(&mut c[[i, j]], sum);
} else {
let mut result = [0.0f32; 8];
_mm256_storeu_ps(result.as_mut_ptr(), sum);
for jj in 0..(n - j) {
c[[i, j + jj]] = result[jj];
}
}
}
}
}
pub fn matrix_vector_multiply_f32(matrix: &Array2<f32>, vector: &Array1<f32>) -> Array1<f32> {
let (m, n) = matrix.dim();
assert_eq!(n, vector.len(), "Matrix columns must match vector length");
let mut result = Array1::zeros(m);
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
if crate::simd_feature_detected!("avx2") && crate::simd_feature_detected!("fma") {
unsafe { matrix_vector_multiply_avx2_fma(matrix, vector, &mut result) };
return result;
} else if crate::simd_feature_detected!("avx2") {
unsafe { matrix_vector_multiply_avx2(matrix, vector, &mut result) };
return result;
} else if crate::simd_feature_detected!("sse2") {
unsafe { matrix_vector_multiply_sse2(matrix, vector, &mut result) };
return result;
}
}
for i in 0..m {
let mut sum = 0.0;
for j in 0..n {
sum += matrix[[i, j]] * vector[j];
}
result[i] = sum;
}
result
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[target_feature(enable = "sse2")]
unsafe fn matrix_vector_multiply_sse2(
matrix: &Array2<f32>,
vector: &Array1<f32>,
result: &mut Array1<f32>,
) {
use core::arch::x86_64::*;
let (m, n) = matrix.dim();
for i in 0..m {
let mut sum = _mm_setzero_ps();
let mut j = 0;
while j + 4 <= n {
let m_vec = _mm_loadu_ps(&matrix[[i, j]]);
let v_vec = _mm_loadu_ps(&vector[j]);
sum = _mm_add_ps(sum, _mm_mul_ps(m_vec, v_vec));
j += 4;
}
let mut result_array = [0.0f32; 4];
_mm_storeu_ps(result_array.as_mut_ptr(), sum);
let mut scalar_sum = result_array.iter().sum::<f32>();
while j < n {
scalar_sum += matrix[[i, j]] * vector[j];
j += 1;
}
result[i] = scalar_sum;
}
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[target_feature(enable = "avx2")]
unsafe fn matrix_vector_multiply_avx2(
matrix: &Array2<f32>,
vector: &Array1<f32>,
result: &mut Array1<f32>,
) {
use core::arch::x86_64::*;
let (m, n) = matrix.dim();
for i in 0..m {
let mut sum = _mm256_setzero_ps();
let mut j = 0;
while j + 8 <= n {
let m_vec = _mm256_loadu_ps(&matrix[[i, j]]);
let v_vec = _mm256_loadu_ps(&vector[j]);
sum = _mm256_add_ps(sum, _mm256_mul_ps(m_vec, v_vec));
j += 8;
}
let mut result_array = [0.0f32; 8];
_mm256_storeu_ps(result_array.as_mut_ptr(), sum);
let mut scalar_sum = result_array.iter().sum::<f32>();
while j < n {
scalar_sum += matrix[[i, j]] * vector[j];
j += 1;
}
result[i] = scalar_sum;
}
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[target_feature(enable = "avx2", enable = "fma")]
unsafe fn matrix_vector_multiply_avx2_fma(
matrix: &Array2<f32>,
vector: &Array1<f32>,
result: &mut Array1<f32>,
) {
use core::arch::x86_64::*;
let (m, n) = matrix.dim();
for i in 0..m {
let mut sum = _mm256_setzero_ps();
let mut j = 0;
while j + 8 <= n {
let m_vec = _mm256_loadu_ps(&matrix[[i, j]]);
let v_vec = _mm256_loadu_ps(&vector[j]);
sum = _mm256_fmadd_ps(m_vec, v_vec, sum);
j += 8;
}
let mut result_array = [0.0f32; 8];
_mm256_storeu_ps(result_array.as_mut_ptr(), sum);
let mut scalar_sum = result_array.iter().sum::<f32>();
while j < n {
scalar_sum += matrix[[i, j]] * vector[j];
j += 1;
}
result[i] = scalar_sum;
}
}
pub fn elementwise_add_simd(a: &Array2<f32>, b: &Array2<f32>) -> Array2<f32> {
assert_eq!(a.shape(), b.shape(), "Arrays must have the same shape");
let mut result = Array2::zeros(a.dim());
if let (Some(a_slice), Some(b_slice), Some(result_slice)) =
(a.as_slice(), b.as_slice(), result.as_slice_mut())
{
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
if crate::simd_feature_detected!("avx2") {
unsafe { elementwise_add_avx2(a_slice, b_slice, result_slice) };
return result;
} else if crate::simd_feature_detected!("sse2") {
unsafe { elementwise_add_sse2(a_slice, b_slice, result_slice) };
return result;
}
}
for i in 0..a_slice.len() {
result_slice[i] = a_slice[i] + b_slice[i];
}
}
result
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[target_feature(enable = "sse2")]
unsafe fn elementwise_add_sse2(a: &[f32], b: &[f32], result: &mut [f32]) {
use core::arch::x86_64::*;
let mut i = 0;
while i + 4 <= a.len() {
let a_vec = _mm_loadu_ps(a.as_ptr().add(i));
let b_vec = _mm_loadu_ps(b.as_ptr().add(i));
let sum = _mm_add_ps(a_vec, b_vec);
_mm_storeu_ps(result.as_mut_ptr().add(i), sum);
i += 4;
}
while i < a.len() {
result[i] = a[i] + b[i];
i += 1;
}
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[target_feature(enable = "avx2")]
unsafe fn elementwise_add_avx2(a: &[f32], b: &[f32], result: &mut [f32]) {
use core::arch::x86_64::*;
let mut i = 0;
while i + 8 <= a.len() {
let a_vec = _mm256_loadu_ps(a.as_ptr().add(i));
let b_vec = _mm256_loadu_ps(b.as_ptr().add(i));
let sum = _mm256_add_ps(a_vec, b_vec);
_mm256_storeu_ps(result.as_mut_ptr().add(i), sum);
i += 8;
}
while i < a.len() {
result[i] = a[i] + b[i];
i += 1;
}
}
pub fn transpose_simd(matrix: &Array2<f32>) -> Array2<f32> {
let (m, n) = matrix.dim();
let mut result = Array2::zeros((n, m));
const BLOCK_SIZE: usize = 64;
for i_block in (0..m).step_by(BLOCK_SIZE) {
for j_block in (0..n).step_by(BLOCK_SIZE) {
let i_end = (i_block + BLOCK_SIZE).min(m);
let j_end = (j_block + BLOCK_SIZE).min(n);
for i in i_block..i_end {
for j in j_block..j_end {
result[[j, i]] = matrix[[i, j]];
}
}
}
}
result
}
pub fn matrix_sum_simd(matrix: &Array2<f32>) -> f32 {
if let Some(slice) = matrix.as_slice() {
crate::vector::sum(slice)
} else {
matrix.iter().sum()
}
}
pub fn matrix_mean_simd(matrix: &Array2<f32>) -> f32 {
if let Some(slice) = matrix.as_slice() {
crate::vector::mean(slice)
} else {
matrix.iter().sum::<f32>() / matrix.len() as f32
}
}
pub fn matrix_variance_simd(matrix: &Array2<f32>) -> f32 {
if let Some(slice) = matrix.as_slice() {
crate::vector::variance(slice)
} else {
let mean = matrix_mean_simd(matrix);
let sum_squared_diff: f32 = matrix
.iter()
.map(|&x| {
let diff = x - mean;
diff * diff
})
.sum();
sum_squared_diff / (matrix.len() - 1).max(1) as f32
}
}
pub fn qr_decomposition_simd(matrix: &Array2<f32>) -> (Array2<f32>, Array2<f32>) {
let (m, n) = matrix.dim();
let mut q = Array2::eye(m);
let mut r = matrix.clone();
for k in 0..n.min(m - 1) {
let col = r.slice(s![k.., k]).to_owned();
let (householder_vec, tau) = compute_householder_vector(&col);
apply_householder_left(&householder_vec, tau, &mut r.slice_mut(s![k.., k..]));
apply_householder_right(&householder_vec, tau, &mut q.slice_mut(s![.., k..]));
}
(q.t().to_owned(), r)
}
fn compute_householder_vector(x: &Array1<f32>) -> (Array1<f32>, f32) {
let n = x.len();
if n == 0 {
return (Array1::zeros(0), 0.0);
}
let mut v = x.clone();
let norm_x = crate::vector::norm_l2(x.as_slice().expect("slice operation should succeed"));
if norm_x < 1e-10 {
return (v, 0.0);
}
let sigma = if x[0] >= 0.0 { norm_x } else { -norm_x };
v[0] += sigma;
let norm_v = crate::vector::norm_l2(v.as_slice().expect("slice operation should succeed"));
if norm_v < 1e-10 {
return (v, 0.0);
}
for val in v.iter_mut() {
*val /= norm_v;
}
let tau = 2.0;
(v, tau)
}
fn apply_householder_left(v: &Array1<f32>, tau: f32, a: &mut ArrayViewMut2<f32>) {
if tau == 0.0 {
return;
}
let (m, n) = a.dim();
for j in 0..n {
let mut col = a.column_mut(j);
let dot_product = crate::vector::dot_product(
v.as_slice().expect("slice operation should succeed"),
col.to_owned()
.as_slice()
.expect("slice operation should succeed"),
);
let factor = tau * dot_product;
for i in 0..m {
col[i] -= factor * v[i];
}
}
}
fn apply_householder_right(v: &Array1<f32>, tau: f32, a: &mut ArrayViewMut2<f32>) {
if tau == 0.0 {
return;
}
let (m, n) = a.dim();
for i in 0..m {
let mut row = a.row_mut(i);
let dot_product = crate::vector::dot_product(
row.to_owned()
.as_slice()
.expect("matrix indexing should be valid"),
v.as_slice().expect("matrix indexing should be valid"),
);
let factor = tau * dot_product;
for j in 0..n {
row[j] -= factor * v[j];
}
}
}
pub fn svd_simd(matrix: &Array2<f32>) -> (Array2<f32>, Array1<f32>, Array2<f32>) {
let (m, n) = matrix.dim();
let (q1, r) = qr_decomposition_simd(matrix);
let (q2, r2) = qr_decomposition_simd(&r.t().to_owned());
let min_dim = m.min(n);
let mut singular_values = Array1::zeros(min_dim);
for i in 0..min_dim {
if i < r2.nrows() && i < r2.ncols() {
singular_values[i] = r2[[i, i]].abs();
}
}
let mut indices: Vec<usize> = (0..min_dim).collect();
indices.sort_by(|&a, &b| {
singular_values[b]
.partial_cmp(&singular_values[a])
.expect("operation should succeed")
});
let mut sorted_values = Array1::zeros(min_dim);
for (i, &idx) in indices.iter().enumerate() {
sorted_values[i] = singular_values[idx];
}
(q1, sorted_values, q2.t().to_owned())
}
pub fn power_iteration_simd(
matrix: &Array2<f32>,
max_iterations: usize,
tolerance: f32,
) -> (f32, Array1<f32>) {
let n = matrix.nrows();
assert_eq!(
n,
matrix.ncols(),
"Matrix must be square for eigenvalue computation"
);
let mut v = Array1::from_elem(n, 1.0 / (n as f32).sqrt());
let mut eigenvalue = 0.0;
for _ in 0..max_iterations {
let v_new = matrix_vector_multiply_f32(matrix, &v);
let new_eigenvalue = crate::vector::dot_product(
v.as_slice().expect("slice operation should succeed"),
v_new.as_slice().expect("slice operation should succeed"),
);
let norm =
crate::vector::norm_l2(v_new.as_slice().expect("slice operation should succeed"));
if norm < 1e-10 {
break;
}
v = v_new / norm;
if (new_eigenvalue - eigenvalue).abs() < tolerance {
eigenvalue = new_eigenvalue;
break;
}
eigenvalue = new_eigenvalue;
}
(eigenvalue, v)
}
pub fn eigenvalues_simd(matrix: &Array2<f32>, num_eigenvalues: usize) -> Vec<(f32, Array1<f32>)> {
let n = matrix.nrows();
assert_eq!(n, matrix.ncols(), "Matrix must be square");
let mut eigenvalues = Vec::new();
let mut a = matrix.clone();
for _ in 0..num_eigenvalues.min(n) {
let (eigenvalue, eigenvector) = power_iteration_simd(&a, 1000, 1e-6);
if eigenvalue.abs() < 1e-10 {
break;
}
eigenvalues.push((eigenvalue, eigenvector.clone()));
let outer_product = compute_outer_product_simd(&eigenvector, &eigenvector);
let deflation = &outer_product * eigenvalue;
a = &a - &deflation;
}
eigenvalues
}
pub fn compute_outer_product_simd(a: &Array1<f32>, b: &Array1<f32>) -> Array2<f32> {
let m = a.len();
let n = b.len();
let mut result = Array2::zeros((m, n));
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
if crate::simd_feature_detected!("avx2") {
unsafe { outer_product_avx2(a, b, &mut result) };
return result;
} else if crate::simd_feature_detected!("sse2") {
unsafe { outer_product_sse2(a, b, &mut result) };
return result;
}
}
for i in 0..m {
for j in 0..n {
result[[i, j]] = a[i] * b[j];
}
}
result
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[target_feature(enable = "sse2")]
unsafe fn outer_product_sse2(a: &Array1<f32>, b: &Array1<f32>, result: &mut Array2<f32>) {
use core::arch::x86_64::*;
let m = a.len();
let n = b.len();
for i in 0..m {
let a_val = _mm_set1_ps(a[i]);
let mut j = 0;
while j + 4 <= n {
let b_vec = _mm_loadu_ps(&b[j]);
let product = _mm_mul_ps(a_val, b_vec);
_mm_storeu_ps(&mut result[[i, j]], product);
j += 4;
}
while j < n {
result[[i, j]] = a[i] * b[j];
j += 1;
}
}
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[target_feature(enable = "avx2")]
unsafe fn outer_product_avx2(a: &Array1<f32>, b: &Array1<f32>, result: &mut Array2<f32>) {
use core::arch::x86_64::*;
let m = a.len();
let n = b.len();
for i in 0..m {
let a_val = _mm256_set1_ps(a[i]);
let mut j = 0;
while j + 8 <= n {
let b_vec = _mm256_loadu_ps(&b[j]);
let product = _mm256_mul_ps(a_val, b_vec);
_mm256_storeu_ps(&mut result[[i, j]], product);
j += 8;
}
while j < n {
result[[i, j]] = a[i] * b[j];
j += 1;
}
}
}
#[allow(non_snake_case)]
#[cfg(all(test, not(feature = "no-std")))]
mod tests {
use super::*;
use approx::assert_relative_eq;
#[cfg(feature = "no-std")]
use alloc::{vec, vec::Vec};
#[test]
fn test_matrix_multiply_simd() {
let a = Array2::from_shape_vec((3, 4), (0..12).map(|x| x as f32).collect())
.expect("shape and data length should match");
let b = Array2::from_shape_vec((4, 2), (0..8).map(|x| x as f32 + 1.0).collect())
.expect("shape and data length should match");
let result = matrix_multiply_f32_simd(&a, &b);
let expected = a.dot(&b);
for i in 0..result.nrows() {
for j in 0..result.ncols() {
assert_relative_eq!(result[[i, j]], expected[[i, j]], epsilon = 1e-5);
}
}
}
#[test]
fn test_matrix_vector_multiply() {
let matrix = Array2::from_shape_vec((3, 4), (0..12).map(|x| x as f32).collect())
.expect("shape and data length should match");
let vector = Array1::from_vec((0..4).map(|x| x as f32 + 1.0).collect());
let result = matrix_vector_multiply_f32(&matrix, &vector);
let expected = matrix.dot(&vector);
for i in 0..result.len() {
assert_relative_eq!(result[i], expected[i], epsilon = 1e-5);
}
}
#[test]
fn test_elementwise_add_simd() {
let a = Array2::from_shape_vec((2, 3), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0])
.expect("shape and data length should match");
let b = Array2::from_shape_vec((2, 3), vec![10.0, 20.0, 30.0, 40.0, 50.0, 60.0])
.expect("shape and data length should match");
let result = elementwise_add_simd(&a, &b);
let expected = &a + &b;
for i in 0..result.nrows() {
for j in 0..result.ncols() {
assert_relative_eq!(result[[i, j]], expected[[i, j]], epsilon = 1e-6);
}
}
}
#[test]
fn test_transpose_simd() {
let matrix = Array2::from_shape_vec((3, 4), (0..12).map(|x| x as f32).collect())
.expect("shape and data length should match");
let result = transpose_simd(&matrix);
let expected = matrix.t();
assert_eq!(result.shape(), expected.shape());
for i in 0..result.nrows() {
for j in 0..result.ncols() {
assert_relative_eq!(result[[i, j]], expected[[i, j]], epsilon = 1e-6);
}
}
}
#[test]
fn test_matrix_reductions() {
let matrix = Array2::from_shape_vec((3, 4), (1..13).map(|x| x as f32).collect())
.expect("shape and data length should match");
let sum = matrix_sum_simd(&matrix);
let expected_sum = matrix.sum();
assert_relative_eq!(sum, expected_sum, epsilon = 1e-5);
let mean = matrix_mean_simd(&matrix);
let expected_mean = matrix
.mean()
.expect("array should have elements for mean computation");
assert_relative_eq!(mean, expected_mean, epsilon = 1e-5);
let variance = matrix_variance_simd(&matrix);
assert!(variance > 0.0); }
#[test]
fn test_qr_decomposition() {
let matrix = Array2::from_shape_vec((3, 2), vec![1.0, 0.0, 0.0, 1.0, 1.0, 1.0])
.expect("shape and data length should match");
let (q, r) = qr_decomposition_simd(&matrix);
assert_eq!(q.shape(), &[3, 3]);
assert_eq!(r.shape(), &[3, 2]);
assert!(!q.is_empty());
assert!(!r.is_empty());
}
#[test]
fn test_power_iteration() {
let matrix =
Array2::from_shape_vec((3, 3), vec![4.0, 1.0, 1.0, 1.0, 3.0, 2.0, 1.0, 2.0, 3.0])
.expect("operation should succeed");
let (eigenvalue, eigenvector) = power_iteration_simd(&matrix, 1000, 1e-6);
assert!(eigenvalue > 0.0);
assert!(eigenvalue < 10.0);
let norm = crate::vector::norm_l2(
eigenvector
.as_slice()
.expect("slice operation should succeed"),
);
assert_relative_eq!(norm, 1.0, epsilon = 1e-5);
}
#[test]
fn test_outer_product() {
let a = Array1::from_vec(vec![1.0, 2.0, 3.0]);
let b = Array1::from_vec(vec![4.0, 5.0]);
let result = compute_outer_product_simd(&a, &b);
assert_eq!(result.shape(), &[3, 2]);
assert_relative_eq!(result[[0, 0]], 4.0, epsilon = 1e-6);
assert_relative_eq!(result[[0, 1]], 5.0, epsilon = 1e-6);
assert_relative_eq!(result[[1, 0]], 8.0, epsilon = 1e-6);
assert_relative_eq!(result[[1, 1]], 10.0, epsilon = 1e-6);
assert_relative_eq!(result[[2, 0]], 12.0, epsilon = 1e-6);
assert_relative_eq!(result[[2, 1]], 15.0, epsilon = 1e-6);
}
#[test]
fn test_svd_basic() {
let matrix = Array2::from_shape_vec((3, 2), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0])
.expect("shape and data length should match");
let (u, s, vt) = svd_simd(&matrix);
assert_eq!(u.shape(), &[3, 3]);
assert_eq!(s.len(), 2);
assert_eq!(vt.shape(), &[2, 2]);
for i in 0..s.len() {
assert!(s[i] >= 0.0);
if i > 0 {
assert!(s[i - 1] >= s[i]);
}
}
}
}