use super::{adaptive, WorkerConfig};
use crate::error::{LinalgError, LinalgResult};
use scirs2_core::ndarray::{Array1, ArrayView1, ArrayView2};
use scirs2_core::numeric::{Float, NumAssign, One, Zero};
use scirs2_core::parallel_ops::*;
use std::iter::Sum;
pub fn parallel_matvec<F>(
matrix: &ArrayView2<F>,
vector: &ArrayView1<F>,
config: &WorkerConfig,
) -> LinalgResult<Array1<F>>
where
F: Float + Send + Sync + Zero + Sum + 'static,
{
let (m, n) = matrix.dim();
if n != vector.len() {
return Err(LinalgError::ShapeError(format!(
"Matrix-vector dimensions incompatible: {}x{} * {}",
m,
n,
vector.len()
)));
}
let datasize = m * n;
if !adaptive::should_use_parallel(datasize, config) {
return Ok(matrix.dot(vector));
}
config.apply();
let result_vec: Vec<F> = (0..m)
.into_par_iter()
.map(|i| {
matrix
.row(i)
.iter()
.zip(vector.iter())
.map(|(&aij, &xj)| aij * xj)
.sum()
})
.collect();
Ok(Array1::from_vec(result_vec))
}
pub fn parallel_power_iteration<F>(
matrix: &ArrayView2<F>,
max_iter: usize,
tolerance: F,
config: &WorkerConfig,
) -> LinalgResult<(F, Array1<F>)>
where
F: Float
+ Send
+ Sync
+ Zero
+ Sum
+ NumAssign
+ One
+ scirs2_core::ndarray::ScalarOperand
+ 'static,
{
let (m, n) = matrix.dim();
if m != n {
return Err(LinalgError::ShapeError(
"Power iteration requires square matrix".to_string(),
));
}
let datasize = m * n;
if !adaptive::should_use_parallel(datasize, config) {
return crate::eigen::power_iteration(&matrix.view(), max_iter, tolerance);
}
config.apply();
let mut v = Array1::ones(n);
let norm = v.iter().map(|&x| x * x).sum::<F>().sqrt();
v /= norm;
let mut eigenvalue = F::zero();
for _iter in 0..max_iter {
let new_v = parallel_matvec(matrix, &v.view(), config)?;
let new_eigenvalue = new_v
.iter()
.zip(v.iter())
.map(|(&new_vi, &vi)| new_vi * vi)
.sum::<F>();
let norm = new_v.iter().map(|&x| x * x).sum::<F>().sqrt();
if norm < F::epsilon() {
return Err(LinalgError::ComputationError(
"Vector became zero during iteration".to_string(),
));
}
let normalized_v = new_v / norm;
if (new_eigenvalue - eigenvalue).abs() < tolerance {
return Ok((new_eigenvalue, normalized_v));
}
eigenvalue = new_eigenvalue;
v = normalized_v;
}
Err(LinalgError::ComputationError(
"Power iteration failed to converge".to_string(),
))
}
pub mod vector_ops {
use super::*;
pub fn parallel_dot<F>(
x: &ArrayView1<F>,
y: &ArrayView1<F>,
config: &WorkerConfig,
) -> LinalgResult<F>
where
F: Float + Send + Sync + Zero + Sum + 'static,
{
if x.len() != y.len() {
return Err(LinalgError::ShapeError(
"Vectors must have same length for dot product".to_string(),
));
}
let datasize = x.len();
if !adaptive::should_use_parallel(datasize, config) {
return Ok(x.iter().zip(y.iter()).map(|(&xi, &yi)| xi * yi).sum());
}
config.apply();
let result = (0..x.len()).into_par_iter().map(|i| x[i] * y[i]).sum();
Ok(result)
}
pub fn parallel_norm<F>(x: &ArrayView1<F>, config: &WorkerConfig) -> LinalgResult<F>
where
F: Float + Send + Sync + Zero + Sum + 'static,
{
let datasize = x.len();
if !adaptive::should_use_parallel(datasize, config) {
return Ok(x.iter().map(|&xi| xi * xi).sum::<F>().sqrt());
}
config.apply();
let sum_squares = (0..x.len()).into_par_iter().map(|i| x[i] * x[i]).sum::<F>();
Ok(sum_squares.sqrt())
}
pub fn parallel_axpy<F>(
alpha: F,
x: &ArrayView1<F>,
y: &ArrayView1<F>,
config: &WorkerConfig,
) -> LinalgResult<Array1<F>>
where
F: Float + Send + Sync + 'static,
{
if x.len() != y.len() {
return Err(LinalgError::ShapeError(
"Vectors must have same length for AXPY".to_string(),
));
}
let datasize = x.len();
if !adaptive::should_use_parallel(datasize, config) {
let result = x
.iter()
.zip(y.iter())
.map(|(&xi, &yi)| alpha * xi + yi)
.collect();
return Ok(Array1::from_vec(result));
}
config.apply();
let result_vec: Vec<F> = (0..x.len())
.into_par_iter()
.map(|i| alpha * x[i] + y[i])
.collect();
Ok(Array1::from_vec(result_vec))
}
}
pub fn parallel_gemm<F>(
a: &ArrayView2<F>,
b: &ArrayView2<F>,
config: &WorkerConfig,
) -> LinalgResult<scirs2_core::ndarray::Array2<F>>
where
F: Float + Send + Sync + Zero + Sum + NumAssign + 'static,
{
let (m, k) = a.dim();
let (k2, n) = b.dim();
if k != k2 {
return Err(LinalgError::ShapeError(format!(
"Matrix dimensions incompatible for multiplication: {m}x{k} * {k2}x{n}"
)));
}
let datasize = m * k * n;
if !adaptive::should_use_parallel(datasize, config) {
return Ok(a.dot(b));
}
config.apply();
let blocksize = config.chunksize;
let mut result = scirs2_core::ndarray::Array2::zeros((m, n));
result
.outer_iter_mut()
.enumerate()
.par_bridge()
.for_each(|(i, mut row)| {
for j in 0..n {
let mut sum = F::zero();
for kb in (0..k).step_by(blocksize) {
let k_end = std::cmp::min(kb + blocksize, k);
for ki in kb..k_end {
sum += a[[i, ki]] * b[[ki, j]];
}
}
row[j] = sum;
}
});
Ok(result)
}
pub fn parallel_qr<F>(
matrix: &ArrayView2<F>,
config: &WorkerConfig,
) -> LinalgResult<(
scirs2_core::ndarray::Array2<F>,
scirs2_core::ndarray::Array2<F>,
)>
where
F: Float
+ Send
+ Sync
+ Zero
+ Sum
+ One
+ NumAssign
+ scirs2_core::ndarray::ScalarOperand
+ 'static,
{
let (m, n) = matrix.dim();
let datasize = m * n;
if !adaptive::should_use_parallel(datasize, config) {
return crate::decomposition::qr(&matrix.view(), None);
}
config.apply();
let mut a = matrix.to_owned();
let mut q = scirs2_core::ndarray::Array2::eye(m);
let min_dim = std::cmp::min(m, n);
for k in 0..min_dim {
let x = a.slice(scirs2_core::ndarray::s![k.., k]).to_owned();
let alpha = if x[0] >= F::zero() {
-x.iter().map(|&xi| xi * xi).sum::<F>().sqrt()
} else {
x.iter().map(|&xi| xi * xi).sum::<F>().sqrt()
};
if alpha.abs() < F::epsilon() {
continue;
}
let mut v = x.clone();
v[0] -= alpha;
let v_norm_sq = v.iter().map(|&vi| vi * vi).sum::<F>();
if v_norm_sq < F::epsilon() {
continue;
}
let remaining_cols = n - k;
if remaining_cols > 1 {
for j in k..n {
let col = a.slice(scirs2_core::ndarray::s![k.., j]).to_owned();
let dot_product = v
.iter()
.zip(col.iter())
.map(|(&vi, &ci)| vi * ci)
.sum::<F>();
let factor = F::from(2.0).expect("Operation failed") * dot_product / v_norm_sq;
for (i, &vi) in v.iter().enumerate() {
a[[k + i, j]] -= factor * vi;
}
}
}
for i in 0..m {
let row = q.slice(scirs2_core::ndarray::s![i, k..]).to_owned();
let dot_product = v
.iter()
.zip(row.iter())
.map(|(&vi, &ri)| vi * ri)
.sum::<F>();
let factor = F::from(2.0).expect("Operation failed") * dot_product / v_norm_sq;
for (j, &vj) in v.iter().enumerate() {
q[[i, k + j]] -= factor * vj;
}
}
}
let r = a.slice(scirs2_core::ndarray::s![..min_dim, ..]).to_owned();
Ok((q, r))
}
pub fn parallel_cholesky<F>(
matrix: &ArrayView2<F>,
config: &WorkerConfig,
) -> LinalgResult<scirs2_core::ndarray::Array2<F>>
where
F: Float
+ Send
+ Sync
+ Zero
+ Sum
+ One
+ NumAssign
+ scirs2_core::ndarray::ScalarOperand
+ 'static,
{
let (m, n) = matrix.dim();
if m != n {
return Err(LinalgError::ShapeError(
"Cholesky decomposition requires square matrix".to_string(),
));
}
let datasize = m * n;
if !adaptive::should_use_parallel(datasize, config) {
return crate::decomposition::cholesky(&matrix.view(), None);
}
config.apply();
let mut l = scirs2_core::ndarray::Array2::zeros((n, n));
let blocksize = config.chunksize;
for k in (0..n).step_by(blocksize) {
let k_end = std::cmp::min(k + blocksize, n);
for i in k..k_end {
let mut sum = F::zero();
for j in 0..i {
sum += l[[i, j]] * l[[i, j]];
}
let aii = matrix[[i, i]] - sum;
if aii <= F::zero() {
return Err(LinalgError::ComputationError(
"Matrix is not positive definite".to_string(),
));
}
l[[i, i]] = aii.sqrt();
for j in (i + 1)..k_end {
let mut sum = F::zero();
for p in 0..i {
sum += l[[j, p]] * l[[i, p]];
}
l[[j, i]] = (matrix[[j, i]] - sum) / l[[i, i]];
}
}
if k_end < n {
for i in k_end..n {
for j in k..k_end {
let mut sum = F::zero();
for p in 0..j {
sum += l[[i, p]] * l[[j, p]];
}
l[[i, j]] = (matrix[[i, j]] - sum) / l[[j, j]];
}
}
}
}
Ok(l)
}
pub fn parallel_lu<F>(
matrix: &ArrayView2<F>,
config: &WorkerConfig,
) -> LinalgResult<(
scirs2_core::ndarray::Array2<F>,
scirs2_core::ndarray::Array2<F>,
scirs2_core::ndarray::Array2<F>,
)>
where
F: Float
+ Send
+ Sync
+ Zero
+ Sum
+ One
+ NumAssign
+ scirs2_core::ndarray::ScalarOperand
+ 'static,
{
let (m, n) = matrix.dim();
let datasize = m * n;
if !adaptive::should_use_parallel(datasize, config) {
return crate::decomposition::lu(&matrix.view(), None);
}
config.apply();
let mut a = matrix.to_owned();
let mut perm_vec = (0..m).collect::<Vec<_>>();
let min_dim = std::cmp::min(m, n);
for k in 0..min_dim {
let mut max_val = F::zero();
let mut pivot_row = k;
for i in k..m {
let abs_val = a[[i, k]].abs();
if abs_val > max_val {
max_val = abs_val;
pivot_row = i;
}
}
if max_val < F::epsilon() {
return Err(LinalgError::ComputationError(
"Matrix is singular".to_string(),
));
}
if pivot_row != k {
for j in 0..n {
let temp = a[[k, j]];
a[[k, j]] = a[[pivot_row, j]];
a[[pivot_row, j]] = temp;
}
perm_vec.swap(k, pivot_row);
}
let pivot = a[[k, k]];
for i in (k + 1)..m {
let multiplier = a[[i, k]] / pivot;
a[[i, k]] = multiplier;
for j in (k + 1)..n {
a[[i, j]] = a[[i, j]] - multiplier * a[[k, j]];
}
}
}
let mut p = scirs2_core::ndarray::Array2::zeros((m, m));
for (i, &piv) in perm_vec.iter().enumerate() {
p[[i, piv]] = F::one();
}
let mut l = scirs2_core::ndarray::Array2::eye(m);
let mut u = scirs2_core::ndarray::Array2::zeros((m, n));
for i in 0..m {
for j in 0..n {
if i > j && j < min_dim {
l[[i, j]] = a[[i, j]];
} else if i <= j {
u[[i, j]] = a[[i, j]];
}
}
}
Ok((p, l, u))
}
pub fn parallel_conjugate_gradient<F>(
matrix: &ArrayView2<F>,
b: &ArrayView1<F>,
max_iter: usize,
tolerance: F,
config: &WorkerConfig,
) -> LinalgResult<Array1<F>>
where
F: Float
+ Send
+ Sync
+ Zero
+ Sum
+ One
+ NumAssign
+ scirs2_core::ndarray::ScalarOperand
+ 'static,
{
let (m, n) = matrix.dim();
if m != n {
return Err(LinalgError::ShapeError(
"CG requires square matrix".to_string(),
));
}
if n != b.len() {
return Err(LinalgError::ShapeError(
"Matrix and vector dimensions incompatible".to_string(),
));
}
let datasize = m * n;
if !adaptive::should_use_parallel(datasize, config) {
return crate::iterative_solvers::conjugate_gradient(
&matrix.view(),
&b.view(),
max_iter,
tolerance,
None,
);
}
config.apply();
let mut x = Array1::zeros(n);
let ax = parallel_matvec(matrix, &x.view(), config)?;
let mut r = b - &ax;
let mut p = r.clone();
let mut rsold = vector_ops::parallel_dot(&r.view(), &r.view(), config)?;
for _iter in 0..max_iter {
let ap = parallel_matvec(matrix, &p.view(), config)?;
let alpha = rsold / vector_ops::parallel_dot(&p.view(), &ap.view(), config)?;
x = vector_ops::parallel_axpy(alpha, &p.view(), &x.view(), config)?;
r = vector_ops::parallel_axpy(-alpha, &ap.view(), &r.view(), config)?;
let rsnew = vector_ops::parallel_dot(&r.view(), &r.view(), config)?;
if rsnew.sqrt() < tolerance {
return Ok(x);
}
let beta = rsnew / rsold;
p = vector_ops::parallel_axpy(beta, &p.view(), &r.view(), config)?;
rsold = rsnew;
}
Err(LinalgError::ComputationError(
"Conjugate gradient failed to converge".to_string(),
))
}
pub fn parallel_svd<F>(
matrix: &ArrayView2<F>,
config: &WorkerConfig,
) -> LinalgResult<(
scirs2_core::ndarray::Array2<F>,
scirs2_core::ndarray::Array1<F>,
scirs2_core::ndarray::Array2<F>,
)>
where
F: Float
+ Send
+ Sync
+ Zero
+ Sum
+ One
+ NumAssign
+ scirs2_core::ndarray::ScalarOperand
+ 'static,
{
let (m, n) = matrix.dim();
let datasize = m * n;
if !adaptive::should_use_parallel(datasize, config) {
return crate::decomposition::svd(&matrix.view(), false, None);
}
config.apply();
let (q, r) = parallel_qr(matrix, config)?;
let (u_r, s, vt) = crate::decomposition::svd(&r.view(), false, None)?;
let u = parallel_gemm(&q.view(), &u_r.view(), config)?;
Ok((u, s, vt))
}
pub fn parallel_gmres<F>(
matrix: &ArrayView2<F>,
b: &ArrayView1<F>,
max_iter: usize,
tolerance: F,
restart: usize,
config: &WorkerConfig,
) -> LinalgResult<Array1<F>>
where
F: Float
+ Send
+ Sync
+ Zero
+ Sum
+ One
+ NumAssign
+ scirs2_core::ndarray::ScalarOperand
+ std::fmt::Debug
+ std::fmt::Display
+ 'static,
{
let (m, n) = matrix.dim();
if m != n {
return Err(LinalgError::ShapeError(
"GMRES requires square matrix".to_string(),
));
}
if n != b.len() {
return Err(LinalgError::ShapeError(
"Matrix and vector dimensions incompatible".to_string(),
));
}
let datasize = m * n;
if !adaptive::should_use_parallel(datasize, config) {
let options = crate::solvers::iterative::IterativeSolverOptions {
max_iterations: max_iter,
tolerance,
verbose: false,
restart: Some(restart),
};
let result = crate::solvers::iterative::gmres(matrix, b, None, &options)?;
return Ok(result.solution);
}
config.apply();
let mut x = Array1::zeros(n);
let restart = restart.min(n);
for _outer in 0..max_iter {
let ax = parallel_matvec(matrix, &x.view(), config)?;
let r = b - &ax;
let beta = vector_ops::parallel_norm(&r.view(), config)?;
if beta < tolerance {
return Ok(x);
}
let mut v = vec![r / beta];
let mut h = scirs2_core::ndarray::Array2::<F>::zeros((restart + 1, restart));
for j in 0..restart {
let w = parallel_matvec(matrix, &v[j].view(), config)?;
let mut w_new = w.clone();
for i in 0..=j {
h[[i, j]] = vector_ops::parallel_dot(&w.view(), &v[i].view(), config)?;
w_new = vector_ops::parallel_axpy(-h[[i, j]], &v[i].view(), &w_new.view(), config)?;
}
h[[j + 1, j]] = vector_ops::parallel_norm(&w_new.view(), config)?;
if h[[j + 1, j]] < F::epsilon() {
break;
}
v.push(w_new / h[[j + 1, j]]);
}
let k = v.len() - 1;
let h_sub = h.slice(scirs2_core::ndarray::s![..=k, ..k]).to_owned();
let mut g = Array1::zeros(k + 1);
g[0] = beta;
let mut y = Array1::zeros(k);
for i in (0..k).rev() {
let mut sum = g[i];
for j in (i + 1)..k {
sum -= h_sub[[i, j]] * y[j];
}
y[i] = sum / h_sub[[i, i]];
}
for i in 0..k {
x = vector_ops::parallel_axpy(y[i], &v[i].view(), &x.view(), config)?;
}
let ax = parallel_matvec(matrix, &x.view(), config)?;
let r = b - &ax;
let residual_norm = vector_ops::parallel_norm(&r.view(), config)?;
if residual_norm < tolerance {
return Ok(x);
}
}
Err(LinalgError::ComputationError(
"GMRES failed to converge".to_string(),
))
}
pub fn parallel_bicgstab<F>(
matrix: &ArrayView2<F>,
b: &ArrayView1<F>,
max_iter: usize,
tolerance: F,
config: &WorkerConfig,
) -> LinalgResult<Array1<F>>
where
F: Float
+ Send
+ Sync
+ Zero
+ Sum
+ One
+ NumAssign
+ scirs2_core::ndarray::ScalarOperand
+ 'static,
{
let (m, n) = matrix.dim();
if m != n {
return Err(LinalgError::ShapeError(
"BiCGSTAB requires square matrix".to_string(),
));
}
if n != b.len() {
return Err(LinalgError::ShapeError(
"Matrix and vector dimensions incompatible".to_string(),
));
}
let datasize = m * n;
if !adaptive::should_use_parallel(datasize, config) {
return crate::iterative_solvers::bicgstab(
&matrix.view(),
&b.view(),
max_iter,
tolerance,
None,
);
}
config.apply();
let mut x = Array1::zeros(n);
let ax = parallel_matvec(matrix, &x.view(), config)?;
let mut r = b - &ax;
let r_hat = r.clone();
let mut rho = F::one();
let mut alpha = F::one();
let mut omega = F::one();
let mut v = Array1::zeros(n);
let mut p = Array1::zeros(n);
for _iter in 0..max_iter {
let rho_new = vector_ops::parallel_dot(&r_hat.view(), &r.view(), config)?;
if rho_new.abs() < F::epsilon() {
return Err(LinalgError::ComputationError(
"BiCGSTAB breakdown: rho = 0".to_string(),
));
}
let beta = (rho_new / rho) * (alpha / omega);
let temp = vector_ops::parallel_axpy(-omega, &v.view(), &p.view(), config)?;
p = vector_ops::parallel_axpy(
F::one(),
&r.view(),
&vector_ops::parallel_axpy(beta, &temp.view(), &Array1::zeros(n).view(), config)?
.view(),
config,
)?;
v = parallel_matvec(matrix, &p.view(), config)?;
alpha = rho_new / vector_ops::parallel_dot(&r_hat.view(), &v.view(), config)?;
let s = vector_ops::parallel_axpy(-alpha, &v.view(), &r.view(), config)?;
let s_norm = vector_ops::parallel_norm(&s.view(), config)?;
if s_norm < tolerance {
x = vector_ops::parallel_axpy(alpha, &p.view(), &x.view(), config)?;
return Ok(x);
}
let t = parallel_matvec(matrix, &s.view(), config)?;
omega = vector_ops::parallel_dot(&t.view(), &s.view(), config)?
/ vector_ops::parallel_dot(&t.view(), &t.view(), config)?;
x = vector_ops::parallel_axpy(alpha, &p.view(), &x.view(), config)?;
x = vector_ops::parallel_axpy(omega, &s.view(), &x.view(), config)?;
r = vector_ops::parallel_axpy(-omega, &t.view(), &s.view(), config)?;
let r_norm = vector_ops::parallel_norm(&r.view(), config)?;
if r_norm < tolerance {
return Ok(x);
}
rho = rho_new;
}
Err(LinalgError::ComputationError(
"BiCGSTAB failed to converge".to_string(),
))
}
pub fn parallel_jacobi<F>(
matrix: &ArrayView2<F>,
b: &ArrayView1<F>,
max_iter: usize,
tolerance: F,
config: &WorkerConfig,
) -> LinalgResult<Array1<F>>
where
F: Float
+ Send
+ Sync
+ Zero
+ Sum
+ One
+ NumAssign
+ scirs2_core::ndarray::ScalarOperand
+ 'static,
{
let (m, n) = matrix.dim();
if m != n {
return Err(LinalgError::ShapeError(
"Jacobi method requires square matrix".to_string(),
));
}
if n != b.len() {
return Err(LinalgError::ShapeError(
"Matrix and vector dimensions incompatible".to_string(),
));
}
let datasize = m * n;
if !adaptive::should_use_parallel(datasize, config) {
return crate::iterative_solvers::jacobi_method(
&matrix.view(),
&b.view(),
max_iter,
tolerance,
None,
);
}
config.apply();
let diag: Vec<F> = (0..n)
.into_par_iter()
.map(|i| {
if matrix[[i, i]].abs() < F::epsilon() {
F::one() } else {
matrix[[i, i]]
}
})
.collect();
let mut x = Array1::zeros(n);
for _iter in 0..max_iter {
let x_new_vec: Vec<F> = (0..n)
.into_par_iter()
.map(|i| {
let mut sum = b[i];
for j in 0..n {
if i != j {
sum -= matrix[[i, j]] * x[j];
}
}
sum / diag[i]
})
.collect();
let x_new = Array1::from_vec(x_new_vec);
let diff = &x_new - &x;
let error = vector_ops::parallel_norm(&diff.view(), config)?;
if error < tolerance {
return Ok(x_new);
}
x = x_new.clone();
}
Err(LinalgError::ComputationError(
"Jacobi method failed to converge".to_string(),
))
}
pub fn parallel_sor<F>(
matrix: &ArrayView2<F>,
b: &ArrayView1<F>,
omega: F,
max_iter: usize,
tolerance: F,
config: &WorkerConfig,
) -> LinalgResult<Array1<F>>
where
F: Float
+ Send
+ Sync
+ Zero
+ Sum
+ One
+ NumAssign
+ scirs2_core::ndarray::ScalarOperand
+ 'static,
{
let (m, n) = matrix.dim();
if m != n {
return Err(LinalgError::ShapeError(
"SOR requires square matrix".to_string(),
));
}
if n != b.len() {
return Err(LinalgError::ShapeError(
"Matrix and vector dimensions incompatible".to_string(),
));
}
if omega <= F::zero() || omega >= F::from(2.0).expect("Operation failed") {
return Err(LinalgError::InvalidInputError(
"Relaxation parameter omega must be in (0, 2)".to_string(),
));
}
let datasize = m * n;
if !adaptive::should_use_parallel(datasize, config) {
return crate::iterative_solvers::successive_over_relaxation(
&matrix.view(),
&b.view(),
omega,
max_iter,
tolerance,
None,
);
}
config.apply();
let mut x = Array1::zeros(n);
for _iter in 0..max_iter {
let x_old = x.clone();
let red_updates: Vec<(usize, F)> = (0..n)
.into_par_iter()
.filter(|&i| i % 2 == 0)
.map(|i| {
let mut sum = b[i];
for j in 0..n {
if i != j {
sum -= matrix[[i, j]] * x_old[j];
}
}
let x_gs = sum / matrix[[i, i]];
let x_new = (F::one() - omega) * x_old[i] + omega * x_gs;
(i, x_new)
})
.collect();
for (i, val) in red_updates {
x[i] = val;
}
let black_updates: Vec<(usize, F)> = (0..n)
.into_par_iter()
.filter(|&i| i % 2 == 1)
.map(|i| {
let mut sum = b[i];
for j in 0..n {
if i != j {
sum -= matrix[[i, j]] * x[j];
}
}
let x_gs = sum / matrix[[i, i]];
let x_new = (F::one() - omega) * x_old[i] + omega * x_gs;
(i, x_new)
})
.collect();
for (i, val) in black_updates {
x[i] = val;
}
let ax = parallel_matvec(matrix, &x.view(), config)?;
let r = b - &ax;
let error = vector_ops::parallel_norm(&r.view(), config)?;
if error < tolerance {
return Ok(x);
}
}
Err(LinalgError::ComputationError(
"SOR failed to converge".to_string(),
))
}