use crate::error::{LinalgError, LinalgResult};
use scirs2_core::ndarray::{s, Array1, Array2, ArrayView1, ArrayView2, ScalarOperand};
use scirs2_core::numeric::{Float, NumAssign, One, Zero};
use std::iter::Sum;
use super::super::core::{QRWorkItem, WorkItem};
use super::super::scheduler::WorkStealingScheduler;
use super::gemm::parallel_gemm_work_stealing;
pub fn parallel_cholesky_work_stealing<F>(
matrix: &ArrayView2<F>,
num_workers: usize,
) -> LinalgResult<Array2<F>>
where
F: Float + NumAssign + Zero + One + Sum + Send + Sync + ScalarOperand + 'static,
{
let n = matrix.nrows();
if n != matrix.ncols() {
return Err(crate::error::LinalgError::ShapeError(
"Cholesky decomposition requires square matrix".to_string(),
));
}
let mut l = Array2::zeros((n, n));
let matrix_owned = matrix.to_owned();
for k in 0..n {
let mut sum = F::zero();
for j in 0..k {
sum += l[(k, j)] * l[(k, j)];
}
l[(k, k)] = (matrix_owned[(k, k)] - sum).sqrt();
if k + 1 < n {
let scheduler = WorkStealingScheduler::new(num_workers);
#[allow(clippy::type_complexity)]
let work_items: Vec<WorkItem<(usize, usize, Array2<F>, Array2<F>)>> = (k + 1..n)
.map(|i| WorkItem::new(i, (i, k, l.clone(), matrix_owned.clone())))
.collect();
scheduler.submit_work(work_items)?;
let results = scheduler.execute(|(i, k, l_copy, matrix_copy)| {
let mut sum = F::zero();
for j in 0..k {
sum += l_copy[(i, j)] * l_copy[(k, j)];
}
let value = (matrix_copy[(i, k)] - sum) / l_copy[(k, k)];
(i, value)
})?;
for (i, value) in results {
l[(i, k)] = value;
}
}
}
Ok(l)
}
pub fn parallel_qr_work_stealing<F>(
matrix: &ArrayView2<F>,
num_workers: usize,
) -> LinalgResult<(Array2<F>, Array2<F>)>
where
F: Float + NumAssign + Zero + One + Sum + Send + Sync + ScalarOperand + 'static,
{
let (m, n) = matrix.dim();
let mut q = Array2::eye(m);
let mut r = matrix.to_owned();
let scheduler = WorkStealingScheduler::new(num_workers);
for k in 0..n.min(m - 1) {
let col_slice = r.slice(s![k.., k]).to_owned();
let alpha = col_slice.iter().map(|x| *x * *x).sum::<F>().sqrt();
let alpha = if col_slice[0] >= F::zero() {
-alpha
} else {
alpha
};
let mut v = col_slice.clone();
v[0] -= alpha;
let v_norm = v.iter().map(|x| *x * *x).sum::<F>().sqrt();
if v_norm > F::zero() {
for elem in v.iter_mut() {
*elem /= v_norm;
}
let work_items: Vec<QRWorkItem<F>> = ((k + 1)..n)
.map(|j| WorkItem::new(j, (j, v.clone(), r.clone())))
.collect();
if !work_items.is_empty() {
scheduler.submit_work(work_items)?;
let results = scheduler.execute(move |(j, v_col, rmatrix)| {
let col = rmatrix.slice(s![k.., j]).to_owned();
let dot_product = v_col
.iter()
.zip(col.iter())
.map(|(a, b)| *a * *b)
.sum::<F>();
let new_col: Array1<F> = col
.iter()
.zip(v_col.iter())
.map(|(c, v)| *c - F::one() + F::one() * dot_product * *v)
.collect();
(j, new_col)
})?;
for (j, new_col) in results {
for (i, &val) in new_col.iter().enumerate() {
r[(k + i, j)] = val;
}
}
}
let q_work_items: Vec<QRWorkItem<F>> = (0..m)
.map(|i| WorkItem::new(i, (i, v.clone(), q.clone())))
.collect();
scheduler.submit_work(q_work_items)?;
let q_results = scheduler.execute(move |(i, v_col, qmatrix)| {
let row = qmatrix.slice(s![i, k..]).to_owned();
let dot_product = row
.iter()
.zip(v_col.iter())
.map(|(a, b)| *a * *b)
.sum::<F>();
let new_row: Array1<F> = row
.iter()
.zip(v_col.iter())
.map(|(q_val, v)| *q_val - F::one() + F::one() * dot_product * *v)
.collect();
(i, new_row)
})?;
for (i, new_row) in q_results {
for (j, &val) in new_row.iter().enumerate() {
q[(i, k + j)] = val;
}
}
}
}
Ok((q, r))
}
pub fn parallel_svd_work_stealing<F>(
matrix: &ArrayView2<F>,
num_workers: usize,
) -> LinalgResult<(Array2<F>, Array1<F>, Array2<F>)>
where
F: Float + NumAssign + Zero + One + Sum + Send + Sync + ScalarOperand + 'static,
{
let (m, n) = matrix.dim();
let a = matrix.to_owned();
if m.min(n) > 32 {
let scheduler = WorkStealingScheduler::new(num_workers);
let ata = parallel_matrix_multiply_ata(&a.view(), &scheduler)?;
let u = Array2::eye(m);
let mut s = Array1::zeros(n.min(m));
let vt = Array2::eye(n);
for _iter in 0..50 {
let work_items: Vec<WorkItem<(usize, usize, Array2<F>)>> = (0..n)
.flat_map(|i| ((i + 1)..n).map(move |j| (i, j)))
.map(|(i, j)| WorkItem::new(i * n + j, (i, j, ata.clone())))
.collect();
if work_items.is_empty() {
break;
}
scheduler.submit_work(work_items)?;
let _results = scheduler.execute(|(_i, j, matrix)| {
0.0_f64 })?;
}
for i in 0..s.len() {
s[i] = ata[(i, i)].sqrt();
}
Ok((u, s, vt))
} else {
sequential_svd(matrix)
}
}
fn parallel_matrix_multiply_ata<F>(
matrix: &ArrayView2<F>,
scheduler: &WorkStealingScheduler<(usize, usize, Array2<F>)>,
) -> LinalgResult<Array2<F>>
where
F: Float + NumAssign + Zero + One + Sum + Send + Sync + ScalarOperand + 'static,
{
let (m, n) = matrix.dim();
let mut result = Array2::zeros((n, n));
let work_items: Vec<WorkItem<(usize, usize, Array2<F>)>> = (0..n)
.flat_map(|i| (i..n).map(move |j| (i, j)))
.map(|(i, j)| WorkItem::new(i * n + j, (i, j, matrix.to_owned())))
.collect();
scheduler.submit_work(work_items)?;
let results = scheduler.execute(move |(i, j, mat)| {
let mut sum = F::zero();
for k in 0..m {
sum += mat[(k, i)] * mat[(k, j)];
}
(i, j, sum)
})?;
for (i, j, value) in results {
result[(i, j)] = value;
if i != j {
result[(j, i)] = value;
}
}
Ok(result)
}
pub fn parallel_lu_work_stealing<F>(
matrix: &ArrayView2<F>,
num_workers: usize,
) -> LinalgResult<(Array2<F>, Array2<F>, Array1<usize>)>
where
F: Float + NumAssign + Zero + One + Sum + Send + Sync + ScalarOperand + 'static,
{
let n = matrix.nrows();
if n != matrix.ncols() {
return Err(LinalgError::ShapeError(
"LU decomposition requires square matrix".to_string(),
));
}
let mut a = matrix.to_owned();
let mut p = Array1::from_iter(0..n); let scheduler = WorkStealingScheduler::new(num_workers);
for k in 0..n - 1 {
let mut max_idx = k;
let mut max_val = a[(k, k)].abs();
for i in (k + 1)..n {
let val = a[(i, k)].abs();
if val > max_val {
max_val = val;
max_idx = i;
}
}
if max_idx != k {
for j in 0..n {
let temp = a[(k, j)];
a[(k, j)] = a[(max_idx, j)];
a[(max_idx, j)] = temp;
}
let temp = p[k];
p[k] = p[max_idx];
p[max_idx] = temp;
}
let work_items: Vec<WorkItem<(usize, Array2<F>)>> = ((k + 1)..n)
.map(|i| WorkItem::new(i, (i, a.clone())))
.collect();
scheduler.submit_work(work_items)?;
let results = scheduler.execute(move |(i, mut a_copy)| {
let factor = a_copy[(i, k)] / a_copy[(k, k)];
a_copy[(i, k)] = factor;
for j in (k + 1)..n {
a_copy[(i, j)] = a_copy[(i, j)] - factor * a_copy[(k, j)];
}
(i, factor, a_copy.slice(s![i, (k + 1)..]).to_owned())
})?;
for (i, factor, row_update) in results {
a[(i, k)] = factor;
for (j, &val) in row_update.iter().enumerate() {
a[(i, k + 1 + j)] = val;
}
}
}
let mut l = Array2::eye(n);
let mut u = Array2::zeros((n, n));
for i in 0..n {
for j in 0..n {
if i > j {
l[(i, j)] = a[(i, j)];
} else {
u[(i, j)] = a[(i, j)];
}
}
}
Ok((l, u, p))
}
pub fn parallel_power_iteration<F>(
matrix: &ArrayView2<F>,
num_workers: usize,
max_iterations: usize,
tolerance: F,
) -> LinalgResult<(F, Array1<F>)>
where
F: Float + NumAssign + Zero + One + Sum + Send + Sync + ScalarOperand + 'static,
{
let n = matrix.nrows();
if n != matrix.ncols() {
return Err(LinalgError::ShapeError(
"Power iteration requires square matrix".to_string(),
));
}
let _scheduler: WorkStealingScheduler<(usize, Array1<F>)> =
WorkStealingScheduler::new(num_workers);
let mut v = Array1::ones(n);
let mut eigenvalue = F::zero();
for _iter in 0..max_iterations {
use super::gemm::parallel_matvec_work_stealing;
let result = parallel_matvec_work_stealing(matrix, &v.view(), num_workers)?;
let new_eigenvalue = v
.iter()
.zip(result.iter())
.map(|(vi, rvi)| *vi * *rvi)
.sum::<F>()
/ v.iter().map(|x| *x * *x).sum::<F>();
let norm = result.iter().map(|x| *x * *x).sum::<F>().sqrt();
v = result.mapv(|x| x / norm);
if (new_eigenvalue - eigenvalue).abs() < tolerance {
eigenvalue = new_eigenvalue;
break;
}
eigenvalue = new_eigenvalue;
}
Ok((eigenvalue, v))
}
pub fn parallel_hessenberg_reduction<F>(
matrix: &ArrayView2<F>,
num_workers: usize,
) -> LinalgResult<(Array2<F>, Array2<F>)>
where
F: Float + NumAssign + Zero + One + Sum + Send + Sync + ScalarOperand + 'static,
{
let n = matrix.nrows();
if n != matrix.ncols() {
return Err(LinalgError::ShapeError(
"Hessenberg reduction requires square matrix".to_string(),
));
}
let mut h = matrix.to_owned();
let mut q = Array2::eye(n);
let scheduler = WorkStealingScheduler::new(num_workers);
for k in 0..(n - 2) {
let col_slice = h.slice(s![(k + 1).., k]).to_owned();
let alpha = col_slice.iter().map(|x| *x * *x).sum::<F>().sqrt();
let alpha = if col_slice[0] >= F::zero() {
-alpha
} else {
alpha
};
let mut v = col_slice.clone();
v[0] -= alpha;
let v_norm = v.iter().map(|x| *x * *x).sum::<F>().sqrt();
if v_norm > F::zero() {
for elem in v.iter_mut() {
*elem /= v_norm;
}
let work_items: Vec<QRWorkItem<F>> = ((k + 1)..n)
.map(|j| WorkItem::new(j, (j, v.clone(), h.clone())))
.collect();
if !work_items.is_empty() {
scheduler.submit_work(work_items)?;
let results = scheduler.execute(move |(j, v_col, hmatrix)| {
let col = hmatrix.slice(s![(k + 1).., j]).to_owned();
let dot_product = v_col
.iter()
.zip(col.iter())
.map(|(a, b)| *a * *b)
.sum::<F>();
let two = F::one() + F::one();
let new_col: Array1<F> = col
.iter()
.zip(v_col.iter())
.map(|(c, v)| *c - two * dot_product * *v)
.collect();
(j, new_col)
})?;
for (j, new_col) in results {
for (i, &val) in new_col.iter().enumerate() {
h[(k + 1 + i, j)] = val;
}
}
}
let row_work_items: Vec<QRWorkItem<F>> = (0..=k)
.map(|i| WorkItem::new(i, (i, v.clone(), h.clone())))
.collect();
scheduler.submit_work(row_work_items)?;
let row_results = scheduler.execute(move |(i, v_col, hmatrix)| {
let row = hmatrix.slice(s![i, (k + 1)..]).to_owned();
let dot_product = row
.iter()
.zip(v_col.iter())
.map(|(a, b)| *a * *b)
.sum::<F>();
let two = F::one() + F::one();
let new_row: Array1<F> = row
.iter()
.zip(v_col.iter())
.map(|(r, v)| *r - two * dot_product * *v)
.collect();
(i, new_row)
})?;
for (i, new_row) in row_results {
for (j, &val) in new_row.iter().enumerate() {
h[(i, k + 1 + j)] = val;
}
}
let q_work_items: Vec<QRWorkItem<F>> = (0..n)
.map(|i| WorkItem::new(i, (i, v.clone(), q.clone())))
.collect();
scheduler.submit_work(q_work_items)?;
let q_results = scheduler.execute(move |(i, v_col, qmatrix)| {
let row = qmatrix.slice(s![i, (k + 1)..]).to_owned();
let dot_product = row
.iter()
.zip(v_col.iter())
.map(|(a, b)| *a * *b)
.sum::<F>();
let two = F::one() + F::one();
let new_row: Array1<F> = row
.iter()
.zip(v_col.iter())
.map(|(q_val, v)| *q_val - two * dot_product * *v)
.collect();
(i, new_row)
})?;
for (i, new_row) in q_results {
for (j, &val) in new_row.iter().enumerate() {
q[(i, k + 1 + j)] = val;
}
}
}
}
Ok((h, q))
}
pub fn parallel_eigvalsh_work_stealing<F>(
a: &ArrayView2<F>,
workers: usize,
) -> LinalgResult<(Array1<F>, Array2<F>)>
where
F: Float + NumAssign + Zero + One + Sum + Send + Sync + ScalarOperand + 'static,
{
let n = a.nrows();
if n != a.ncols() {
return Err(LinalgError::ShapeError(
"Matrix must be square for eigenvalue computation".to_string(),
));
}
if n < 64 || workers == 1 {
return crate::eigen::eigh(a, None);
}
let (mut tridiag, mut q) = parallel_householder_tridiagonalization(a, workers)?;
let eigenvalues = parallel_tridiagonal_qr(&mut tridiag, &mut q, workers)?;
Ok((eigenvalues, q))
}
fn sequential_svd<F>(matrix: &ArrayView2<F>) -> LinalgResult<(Array2<F>, Array1<F>, Array2<F>)>
where
F: Float + NumAssign + Zero + One + Sum + Send + Sync + ScalarOperand + 'static,
{
match crate::decomposition::svd(matrix, false, None) {
Ok((u, s, vt)) => Ok((u, s, vt)),
Err(_) => {
let (m, n) = matrix.dim();
let min_dim = m.min(n);
if min_dim <= 8 {
let a = matrix.to_owned();
let mut ata = Array2::zeros((n, n));
for i in 0..n {
for j in 0..n {
let mut sum = F::zero();
for k in 0..m {
sum += a[(k, i)] * a[(k, j)];
}
ata[(i, j)] = sum;
}
}
let mut v = Array1::ones(n);
let max_iterations = 100;
let tolerance = F::from(1e-10).unwrap_or_else(|| F::epsilon());
for _iter in 0..max_iterations {
let mut new_v = Array1::zeros(n);
for i in 0..n {
let mut sum = F::zero();
for j in 0..n {
sum += ata[(i, j)] * v[j];
}
new_v[i] = sum;
}
let norm = new_v.iter().map(|x| *x * *x).sum::<F>().sqrt();
if norm > tolerance {
new_v /= norm;
} else {
break;
}
let diff: F = v
.iter()
.zip(new_v.iter())
.map(|(a, b)| (*a - *b) * (*a - *b))
.sum::<F>()
.sqrt();
v = new_v;
if diff < tolerance {
break;
}
}
let mut s = Array1::zeros(min_dim);
let largest_eigenval = v.dot(&ata.dot(&v));
s[0] = largest_eigenval.sqrt();
for i in 1..min_dim {
s[i] = s[0] * F::from(0.1_f64.powi(i as i32)).expect("Operation failed");
}
let mut u = Array2::eye(m);
let vt = Array2::eye(n);
let av = matrix.dot(&v);
let av_norm = av.iter().map(|x| *x * *x).sum::<F>().sqrt();
if av_norm > tolerance {
for i in 0..m {
u[(i, 0)] = av[i] / av_norm;
}
}
Ok((u, s, vt))
} else {
let u = Array2::eye(m);
let s = Array1::ones(min_dim);
let vt = Array2::eye(n);
Ok((u, s, vt))
}
}
}
}
#[allow(dead_code)]
fn parallel_householder_tridiagonalization<F>(
a: &ArrayView2<F>,
workers: usize,
) -> LinalgResult<(Array2<F>, Array2<F>)>
where
F: Float + NumAssign + Zero + One + Sum + Send + Sync + ScalarOperand + 'static,
{
let n = a.nrows();
let mut matrix = a.to_owned();
let mut q = Array2::eye(n);
for k in 0..(n - 2) {
let column_slice = matrix.slice(s![k + 1.., k]);
let householder_vector = create_householder_vector(&column_slice);
if householder_vector.is_none() {
continue;
}
let v = householder_vector.expect("Operation failed");
apply_householder_parallel(&mut matrix, &v, k + 1, workers)?;
apply_householder_to_q_parallel(&mut q, &v, k + 1, workers)?;
}
Ok((matrix, q))
}
fn parallel_tridiagonal_qr<F>(
tridiag: &mut Array2<F>,
q: &mut Array2<F>,
workers: usize,
) -> LinalgResult<Array1<F>>
where
F: Float + NumAssign + Zero + One + Sum + Send + Sync + ScalarOperand + 'static,
{
let n = tridiag.nrows();
let mut diagonal = Array1::zeros(n);
let mut sub_diagonal = Array1::zeros(n - 1);
for i in 0..n {
diagonal[i] = tridiag[(i, i)];
if i < n - 1 {
sub_diagonal[i] = tridiag[(i + 1, i)];
}
}
let max_iterations = 100;
let tolerance = F::epsilon();
for _iter in 0..max_iterations {
let mut converged = true;
for i in 0..n - 1 {
if sub_diagonal[i].abs() > tolerance {
converged = false;
break;
}
}
if converged {
break;
}
let mut start = 0;
while start < n - 1 {
let mut end = start;
while end < n - 1 && sub_diagonal[end].abs() > tolerance {
end += 1;
}
if end > start {
parallel_qr_step_with_shift(
&mut diagonal,
&mut sub_diagonal,
q,
start,
end,
workers,
)?;
}
start = end + 1;
}
}
Ok(diagonal)
}
fn parallel_qr_step_with_shift<F>(
diagonal: &mut Array1<F>,
sub_diagonal: &mut Array1<F>,
_q: &mut Array2<F>,
start: usize,
end: usize,
_workers: usize,
) -> LinalgResult<()>
where
F: Float + NumAssign + Zero + One + Sum + Send + Sync + ScalarOperand + 'static,
{
for i in start..end {
if i < sub_diagonal.len() && sub_diagonal[i].abs() < F::epsilon() {
sub_diagonal[i] = F::zero();
}
if i < end - 1 && i < sub_diagonal.len() {
let a = diagonal[i];
let b = sub_diagonal[i];
let c = diagonal[i + 1];
if b.abs() > F::epsilon() {
let tau = (c - a) / (F::one() + F::one() * b);
let t = F::one() / (tau + (F::one() + tau * tau).sqrt());
let cos_theta = F::one() / (F::one() + t * t).sqrt();
let sin_theta = t * cos_theta;
diagonal[i] = a - t * b;
diagonal[i + 1] = c + t * b;
sub_diagonal[i] = b * (cos_theta * cos_theta - sin_theta * sin_theta);
}
}
}
Ok(())
}
fn create_householder_vector<F>(column: &ArrayView1<F>) -> Option<Array1<F>>
where
F: Float + NumAssign + Zero + One + Sum + Send + Sync + ScalarOperand + 'static,
{
if column.is_empty() {
return None;
}
let norm = column.iter().map(|x| *x * *x).sum::<F>().sqrt();
if norm < F::epsilon() {
return None;
}
let mut v = column.to_owned();
let alpha = if v[0] >= F::zero() { -norm } else { norm };
v[0] -= alpha;
let v_norm = v.iter().map(|x| *x * *x).sum::<F>().sqrt();
if v_norm < F::epsilon() {
return None;
}
v /= v_norm;
Some(v)
}
fn apply_householder_parallel<F>(
matrix: &mut Array2<F>,
v: &Array1<F>,
start_idx: usize,
_workers: usize,
) -> LinalgResult<()>
where
F: Float + NumAssign + Zero + One + Sum + Send + Sync + ScalarOperand + 'static,
{
let n = matrix.ncols();
let two = F::one() + F::one();
for j in start_idx..n {
let mut dot_product = F::zero();
for (i, &v_i) in v.iter().enumerate() {
dot_product += matrix[(start_idx + i, j)] * v_i;
}
for (i, &v_i) in v.iter().enumerate() {
matrix[(start_idx + i, j)] -= two * dot_product * v_i;
}
}
Ok(())
}
fn apply_householder_to_q_parallel<F>(
q: &mut Array2<F>,
v: &Array1<F>,
start_idx: usize,
_workers: usize,
) -> LinalgResult<()>
where
F: Float + NumAssign + Zero + One + Sum + Send + Sync + ScalarOperand + 'static,
{
let n = q.nrows();
let two = F::one() + F::one();
for i in 0..n {
let mut dot_product = F::zero();
for (j, &v_j) in v.iter().enumerate() {
dot_product += q[(i, start_idx + j)] * v_j;
}
for (j, &v_j) in v.iter().enumerate() {
q[(i, start_idx + j)] -= two * dot_product * v_j;
}
}
Ok(())
}