1use super::{adaptive, WorkerConfig};
7use crate::error::{LinalgError, LinalgResult};
8use scirs2_core::ndarray::{Array1, ArrayView1, ArrayView2};
9use scirs2_core::numeric::{Float, NumAssign, One, Zero};
10use scirs2_core::parallel_ops::*;
11use std::iter::Sum;
12
13pub fn parallel_matvec<F>(
28 matrix: &ArrayView2<F>,
29 vector: &ArrayView1<F>,
30 config: &WorkerConfig,
31) -> LinalgResult<Array1<F>>
32where
33 F: Float + Send + Sync + Zero + Sum + 'static,
34{
35 let (m, n) = matrix.dim();
36 if n != vector.len() {
37 return Err(LinalgError::ShapeError(format!(
38 "Matrix-vector dimensions incompatible: {}x{} * {}",
39 m,
40 n,
41 vector.len()
42 )));
43 }
44
45 let datasize = m * n;
46 if !adaptive::should_use_parallel(datasize, config) {
47 return Ok(matrix.dot(vector));
49 }
50
51 config.apply();
52
53 let result_vec: Vec<F> = (0..m)
55 .into_par_iter()
56 .map(|i| {
57 matrix
58 .row(i)
59 .iter()
60 .zip(vector.iter())
61 .map(|(&aij, &xj)| aij * xj)
62 .sum()
63 })
64 .collect();
65
66 Ok(Array1::from_vec(result_vec))
67}
68
69pub fn parallel_power_iteration<F>(
74 matrix: &ArrayView2<F>,
75 max_iter: usize,
76 tolerance: F,
77 config: &WorkerConfig,
78) -> LinalgResult<(F, Array1<F>)>
79where
80 F: Float
81 + Send
82 + Sync
83 + Zero
84 + Sum
85 + NumAssign
86 + One
87 + scirs2_core::ndarray::ScalarOperand
88 + 'static,
89{
90 let (m, n) = matrix.dim();
91 if m != n {
92 return Err(LinalgError::ShapeError(
93 "Power iteration requires square matrix".to_string(),
94 ));
95 }
96
97 let datasize = m * n;
98 if !adaptive::should_use_parallel(datasize, config) {
99 return crate::eigen::power_iteration(&matrix.view(), max_iter, tolerance);
101 }
102
103 config.apply();
104
105 let mut v = Array1::ones(n);
107 let norm = v.iter().map(|&x| x * x).sum::<F>().sqrt();
108 v /= norm;
109
110 let mut eigenvalue = F::zero();
111
112 for _iter in 0..max_iter {
113 let new_v = parallel_matvec(matrix, &v.view(), config)?;
115
116 let new_eigenvalue = new_v
118 .iter()
119 .zip(v.iter())
120 .map(|(&new_vi, &vi)| new_vi * vi)
121 .sum::<F>();
122
123 let norm = new_v.iter().map(|&x| x * x).sum::<F>().sqrt();
125 if norm < F::epsilon() {
126 return Err(LinalgError::ComputationError(
127 "Vector became zero during iteration".to_string(),
128 ));
129 }
130 let normalized_v = new_v / norm;
131
132 if (new_eigenvalue - eigenvalue).abs() < tolerance {
134 return Ok((new_eigenvalue, normalized_v));
135 }
136
137 eigenvalue = new_eigenvalue;
138 v = normalized_v;
139 }
140
141 Err(LinalgError::ComputationError(
142 "Power iteration failed to converge".to_string(),
143 ))
144}
145
146pub mod vector_ops {
151 use super::*;
152
153 pub fn parallel_dot<F>(
155 x: &ArrayView1<F>,
156 y: &ArrayView1<F>,
157 config: &WorkerConfig,
158 ) -> LinalgResult<F>
159 where
160 F: Float + Send + Sync + Zero + Sum + 'static,
161 {
162 if x.len() != y.len() {
163 return Err(LinalgError::ShapeError(
164 "Vectors must have same length for dot product".to_string(),
165 ));
166 }
167
168 let datasize = x.len();
169 if !adaptive::should_use_parallel(datasize, config) {
170 return Ok(x.iter().zip(y.iter()).map(|(&xi, &yi)| xi * yi).sum());
171 }
172
173 config.apply();
174
175 let result = (0..x.len()).into_par_iter().map(|i| x[i] * y[i]).sum();
176
177 Ok(result)
178 }
179
180 pub fn parallel_norm<F>(x: &ArrayView1<F>, config: &WorkerConfig) -> LinalgResult<F>
182 where
183 F: Float + Send + Sync + Zero + Sum + 'static,
184 {
185 let datasize = x.len();
186 if !adaptive::should_use_parallel(datasize, config) {
187 return Ok(x.iter().map(|&xi| xi * xi).sum::<F>().sqrt());
188 }
189
190 config.apply();
191
192 let sum_squares = (0..x.len()).into_par_iter().map(|i| x[i] * x[i]).sum::<F>();
193
194 Ok(sum_squares.sqrt())
195 }
196
197 pub fn parallel_axpy<F>(
202 alpha: F,
203 x: &ArrayView1<F>,
204 y: &ArrayView1<F>,
205 config: &WorkerConfig,
206 ) -> LinalgResult<Array1<F>>
207 where
208 F: Float + Send + Sync + 'static,
209 {
210 if x.len() != y.len() {
211 return Err(LinalgError::ShapeError(
212 "Vectors must have same length for AXPY".to_string(),
213 ));
214 }
215
216 let datasize = x.len();
217 if !adaptive::should_use_parallel(datasize, config) {
218 let result = x
219 .iter()
220 .zip(y.iter())
221 .map(|(&xi, &yi)| alpha * xi + yi)
222 .collect();
223 return Ok(Array1::from_vec(result));
224 }
225
226 config.apply();
227
228 let result_vec: Vec<F> = (0..x.len())
229 .into_par_iter()
230 .map(|i| alpha * x[i] + y[i])
231 .collect();
232
233 Ok(Array1::from_vec(result_vec))
234 }
235}
236
237pub fn parallel_gemm<F>(
242 a: &ArrayView2<F>,
243 b: &ArrayView2<F>,
244 config: &WorkerConfig,
245) -> LinalgResult<scirs2_core::ndarray::Array2<F>>
246where
247 F: Float + Send + Sync + Zero + Sum + NumAssign + 'static,
248{
249 let (m, k) = a.dim();
250 let (k2, n) = b.dim();
251
252 if k != k2 {
253 return Err(LinalgError::ShapeError(format!(
254 "Matrix dimensions incompatible for multiplication: {m}x{k} * {k2}x{n}"
255 )));
256 }
257
258 let datasize = m * k * n;
259 if !adaptive::should_use_parallel(datasize, config) {
260 return Ok(a.dot(b));
261 }
262
263 config.apply();
264
265 let blocksize = config.chunksize;
267
268 let mut result = scirs2_core::ndarray::Array2::zeros((m, n));
269
270 result
272 .outer_iter_mut()
273 .enumerate()
274 .par_bridge()
275 .for_each(|(i, mut row)| {
276 for j in 0..n {
277 let mut sum = F::zero();
278 for kb in (0..k).step_by(blocksize) {
279 let k_end = std::cmp::min(kb + blocksize, k);
280 for ki in kb..k_end {
281 sum += a[[i, ki]] * b[[ki, j]];
282 }
283 }
284 row[j] = sum;
285 }
286 });
287
288 Ok(result)
289}
290
291pub fn parallel_qr<F>(
296 matrix: &ArrayView2<F>,
297 config: &WorkerConfig,
298) -> LinalgResult<(
299 scirs2_core::ndarray::Array2<F>,
300 scirs2_core::ndarray::Array2<F>,
301)>
302where
303 F: Float
304 + Send
305 + Sync
306 + Zero
307 + Sum
308 + One
309 + NumAssign
310 + scirs2_core::ndarray::ScalarOperand
311 + 'static,
312{
313 let (m, n) = matrix.dim();
314 let datasize = m * n;
315
316 if !adaptive::should_use_parallel(datasize, config) {
317 return crate::decomposition::qr(&matrix.view(), None);
318 }
319
320 config.apply();
321
322 let mut a = matrix.to_owned();
323 let mut q = scirs2_core::ndarray::Array2::eye(m);
324 let min_dim = std::cmp::min(m, n);
325
326 for k in 0..min_dim {
327 let x = a.slice(scirs2_core::ndarray::s![k.., k]).to_owned();
329 let alpha = if x[0] >= F::zero() {
330 -x.iter().map(|&xi| xi * xi).sum::<F>().sqrt()
331 } else {
332 x.iter().map(|&xi| xi * xi).sum::<F>().sqrt()
333 };
334
335 if alpha.abs() < F::epsilon() {
336 continue;
337 }
338
339 let mut v = x.clone();
340 v[0] -= alpha;
341 let v_norm_sq = v.iter().map(|&vi| vi * vi).sum::<F>();
342
343 if v_norm_sq < F::epsilon() {
344 continue;
345 }
346
347 let remaining_cols = n - k;
349 if remaining_cols > 1 {
350 for j in k..n {
351 let col = a.slice(scirs2_core::ndarray::s![k.., j]).to_owned();
352 let dot_product = v
353 .iter()
354 .zip(col.iter())
355 .map(|(&vi, &ci)| vi * ci)
356 .sum::<F>();
357 let factor = F::from(2.0).unwrap() * dot_product / v_norm_sq;
358
359 for (i, &vi) in v.iter().enumerate() {
360 a[[k + i, j]] -= factor * vi;
361 }
362 }
363 }
364
365 for i in 0..m {
367 let row = q.slice(scirs2_core::ndarray::s![i, k..]).to_owned();
368 let dot_product = v
369 .iter()
370 .zip(row.iter())
371 .map(|(&vi, &ri)| vi * ri)
372 .sum::<F>();
373 let factor = F::from(2.0).unwrap() * dot_product / v_norm_sq;
374
375 for (j, &vj) in v.iter().enumerate() {
376 q[[i, k + j]] -= factor * vj;
377 }
378 }
379 }
380
381 let r = a.slice(scirs2_core::ndarray::s![..min_dim, ..]).to_owned();
382 Ok((q, r))
383}
384
385pub fn parallel_cholesky<F>(
390 matrix: &ArrayView2<F>,
391 config: &WorkerConfig,
392) -> LinalgResult<scirs2_core::ndarray::Array2<F>>
393where
394 F: Float
395 + Send
396 + Sync
397 + Zero
398 + Sum
399 + One
400 + NumAssign
401 + scirs2_core::ndarray::ScalarOperand
402 + 'static,
403{
404 let (m, n) = matrix.dim();
405 if m != n {
406 return Err(LinalgError::ShapeError(
407 "Cholesky decomposition requires square matrix".to_string(),
408 ));
409 }
410
411 let datasize = m * n;
412 if !adaptive::should_use_parallel(datasize, config) {
413 return crate::decomposition::cholesky(&matrix.view(), None);
414 }
415
416 config.apply();
417
418 let mut l = scirs2_core::ndarray::Array2::zeros((n, n));
419 let blocksize = config.chunksize;
420
421 for k in (0..n).step_by(blocksize) {
422 let k_end = std::cmp::min(k + blocksize, n);
423
424 for i in k..k_end {
426 let mut sum = F::zero();
428 for j in 0..i {
429 sum += l[[i, j]] * l[[i, j]];
430 }
431 let aii = matrix[[i, i]] - sum;
432 if aii <= F::zero() {
433 return Err(LinalgError::ComputationError(
434 "Matrix is not positive definite".to_string(),
435 ));
436 }
437 l[[i, i]] = aii.sqrt();
438
439 for j in (i + 1)..k_end {
441 let mut sum = F::zero();
442 for p in 0..i {
443 sum += l[[j, p]] * l[[i, p]];
444 }
445 l[[j, i]] = (matrix[[j, i]] - sum) / l[[i, i]];
446 }
447 }
448
449 if k_end < n {
451 for i in k_end..n {
452 for j in k..k_end {
453 let mut sum = F::zero();
454 for p in 0..j {
455 sum += l[[i, p]] * l[[j, p]];
456 }
457 l[[i, j]] = (matrix[[i, j]] - sum) / l[[j, j]];
458 }
459 }
460 }
461 }
462
463 Ok(l)
464}
465
466pub fn parallel_lu<F>(
470 matrix: &ArrayView2<F>,
471 config: &WorkerConfig,
472) -> LinalgResult<(
473 scirs2_core::ndarray::Array2<F>,
474 scirs2_core::ndarray::Array2<F>,
475 scirs2_core::ndarray::Array2<F>,
476)>
477where
478 F: Float
479 + Send
480 + Sync
481 + Zero
482 + Sum
483 + One
484 + NumAssign
485 + scirs2_core::ndarray::ScalarOperand
486 + 'static,
487{
488 let (m, n) = matrix.dim();
489 let datasize = m * n;
490
491 if !adaptive::should_use_parallel(datasize, config) {
492 return crate::decomposition::lu(&matrix.view(), None);
493 }
494
495 config.apply();
496
497 let mut a = matrix.to_owned();
498 let mut perm_vec = (0..m).collect::<Vec<_>>();
499 let min_dim = std::cmp::min(m, n);
500
501 for k in 0..min_dim {
502 let mut max_val = F::zero();
504 let mut pivot_row = k;
505 for i in k..m {
506 let abs_val = a[[i, k]].abs();
507 if abs_val > max_val {
508 max_val = abs_val;
509 pivot_row = i;
510 }
511 }
512
513 if max_val < F::epsilon() {
514 return Err(LinalgError::ComputationError(
515 "Matrix is singular".to_string(),
516 ));
517 }
518
519 if pivot_row != k {
521 for j in 0..n {
522 let temp = a[[k, j]];
523 a[[k, j]] = a[[pivot_row, j]];
524 a[[pivot_row, j]] = temp;
525 }
526 perm_vec.swap(k, pivot_row);
527 }
528
529 let pivot = a[[k, k]];
531
532 for i in (k + 1)..m {
533 let multiplier = a[[i, k]] / pivot;
534 a[[i, k]] = multiplier;
535
536 for j in (k + 1)..n {
537 a[[i, j]] = a[[i, j]] - multiplier * a[[k, j]];
538 }
539 }
540 }
541
542 let mut p = scirs2_core::ndarray::Array2::zeros((m, m));
544 for (i, &piv) in perm_vec.iter().enumerate() {
545 p[[i, piv]] = F::one();
546 }
547
548 let mut l = scirs2_core::ndarray::Array2::eye(m);
550 let mut u = scirs2_core::ndarray::Array2::zeros((m, n));
551
552 for i in 0..m {
553 for j in 0..n {
554 if i > j && j < min_dim {
555 l[[i, j]] = a[[i, j]];
556 } else if i <= j {
557 u[[i, j]] = a[[i, j]];
558 }
559 }
560 }
561
562 Ok((p, l, u))
563}
564
565pub fn parallel_conjugate_gradient<F>(
570 matrix: &ArrayView2<F>,
571 b: &ArrayView1<F>,
572 max_iter: usize,
573 tolerance: F,
574 config: &WorkerConfig,
575) -> LinalgResult<Array1<F>>
576where
577 F: Float
578 + Send
579 + Sync
580 + Zero
581 + Sum
582 + One
583 + NumAssign
584 + scirs2_core::ndarray::ScalarOperand
585 + 'static,
586{
587 let (m, n) = matrix.dim();
588 if m != n {
589 return Err(LinalgError::ShapeError(
590 "CG requires square matrix".to_string(),
591 ));
592 }
593 if n != b.len() {
594 return Err(LinalgError::ShapeError(
595 "Matrix and vector dimensions incompatible".to_string(),
596 ));
597 }
598
599 let datasize = m * n;
600 if !adaptive::should_use_parallel(datasize, config) {
601 return crate::iterative_solvers::conjugate_gradient(
602 &matrix.view(),
603 &b.view(),
604 max_iter,
605 tolerance,
606 None,
607 );
608 }
609
610 config.apply();
611
612 let mut x = Array1::zeros(n);
614
615 let ax = parallel_matvec(matrix, &x.view(), config)?;
617 let mut r = b - &ax;
618 let mut p = r.clone();
619 let mut rsold = vector_ops::parallel_dot(&r.view(), &r.view(), config)?;
620
621 for _iter in 0..max_iter {
622 let ap = parallel_matvec(matrix, &p.view(), config)?;
623 let alpha = rsold / vector_ops::parallel_dot(&p.view(), &ap.view(), config)?;
624
625 x = vector_ops::parallel_axpy(alpha, &p.view(), &x.view(), config)?;
626 r = vector_ops::parallel_axpy(-alpha, &ap.view(), &r.view(), config)?;
627
628 let rsnew = vector_ops::parallel_dot(&r.view(), &r.view(), config)?;
629
630 if rsnew.sqrt() < tolerance {
631 return Ok(x);
632 }
633
634 let beta = rsnew / rsold;
635 p = vector_ops::parallel_axpy(beta, &p.view(), &r.view(), config)?;
636 rsold = rsnew;
637 }
638
639 Err(LinalgError::ComputationError(
640 "Conjugate gradient failed to converge".to_string(),
641 ))
642}
643
644pub fn parallel_svd<F>(
649 matrix: &ArrayView2<F>,
650 config: &WorkerConfig,
651) -> LinalgResult<(
652 scirs2_core::ndarray::Array2<F>,
653 scirs2_core::ndarray::Array1<F>,
654 scirs2_core::ndarray::Array2<F>,
655)>
656where
657 F: Float
658 + Send
659 + Sync
660 + Zero
661 + Sum
662 + One
663 + NumAssign
664 + scirs2_core::ndarray::ScalarOperand
665 + 'static,
666{
667 let (m, n) = matrix.dim();
668 let datasize = m * n;
669
670 if !adaptive::should_use_parallel(datasize, config) {
671 return crate::decomposition::svd(&matrix.view(), false, None);
672 }
673
674 config.apply();
675
676 let (q, r) = parallel_qr(matrix, config)?;
680
681 let (u_r, s, vt) = crate::decomposition::svd(&r.view(), false, None)?;
683
684 let u = parallel_gemm(&q.view(), &u_r.view(), config)?;
686
687 Ok((u, s, vt))
688}
689
690pub fn parallel_gmres<F>(
694 matrix: &ArrayView2<F>,
695 b: &ArrayView1<F>,
696 max_iter: usize,
697 tolerance: F,
698 restart: usize,
699 config: &WorkerConfig,
700) -> LinalgResult<Array1<F>>
701where
702 F: Float
703 + Send
704 + Sync
705 + Zero
706 + Sum
707 + One
708 + NumAssign
709 + scirs2_core::ndarray::ScalarOperand
710 + std::fmt::Debug
711 + std::fmt::Display
712 + 'static,
713{
714 let (m, n) = matrix.dim();
715 if m != n {
716 return Err(LinalgError::ShapeError(
717 "GMRES requires square matrix".to_string(),
718 ));
719 }
720 if n != b.len() {
721 return Err(LinalgError::ShapeError(
722 "Matrix and vector dimensions incompatible".to_string(),
723 ));
724 }
725
726 let datasize = m * n;
727 if !adaptive::should_use_parallel(datasize, config) {
728 let options = crate::solvers::iterative::IterativeSolverOptions {
730 max_iterations: max_iter,
731 tolerance,
732 verbose: false,
733 restart: Some(restart),
734 };
735 let result = crate::solvers::iterative::gmres(matrix, b, None, &options)?;
736 return Ok(result.solution);
737 }
738
739 config.apply();
740
741 let mut x = Array1::zeros(n);
742 let restart = restart.min(n);
743
744 for _outer in 0..max_iter {
745 let ax = parallel_matvec(matrix, &x.view(), config)?;
747 let r = b - &ax;
748 let beta = vector_ops::parallel_norm(&r.view(), config)?;
749
750 if beta < tolerance {
751 return Ok(x);
752 }
753
754 let mut v = vec![r / beta];
756 let mut h = scirs2_core::ndarray::Array2::<F>::zeros((restart + 1, restart));
757
758 for j in 0..restart {
760 let w = parallel_matvec(matrix, &v[j].view(), config)?;
762
763 let mut w_new = w.clone();
765 for i in 0..=j {
766 h[[i, j]] = vector_ops::parallel_dot(&w.view(), &v[i].view(), config)?;
767 w_new = vector_ops::parallel_axpy(-h[[i, j]], &v[i].view(), &w_new.view(), config)?;
768 }
769
770 h[[j + 1, j]] = vector_ops::parallel_norm(&w_new.view(), config)?;
771
772 if h[[j + 1, j]] < F::epsilon() {
773 break;
774 }
775
776 v.push(w_new / h[[j + 1, j]]);
777 }
778
779 let k = v.len() - 1;
781 let h_sub = h.slice(scirs2_core::ndarray::s![..=k, ..k]).to_owned();
782 let mut g = Array1::zeros(k + 1);
783 g[0] = beta;
784
785 let mut y = Array1::zeros(k);
787 for i in (0..k).rev() {
788 let mut sum = g[i];
789 for j in (i + 1)..k {
790 sum -= h_sub[[i, j]] * y[j];
791 }
792 y[i] = sum / h_sub[[i, i]];
793 }
794
795 for i in 0..k {
797 x = vector_ops::parallel_axpy(y[i], &v[i].view(), &x.view(), config)?;
798 }
799
800 let ax = parallel_matvec(matrix, &x.view(), config)?;
802 let r = b - &ax;
803 let residual_norm = vector_ops::parallel_norm(&r.view(), config)?;
804
805 if residual_norm < tolerance {
806 return Ok(x);
807 }
808 }
809
810 Err(LinalgError::ComputationError(
811 "GMRES failed to converge".to_string(),
812 ))
813}
814
815pub fn parallel_bicgstab<F>(
819 matrix: &ArrayView2<F>,
820 b: &ArrayView1<F>,
821 max_iter: usize,
822 tolerance: F,
823 config: &WorkerConfig,
824) -> LinalgResult<Array1<F>>
825where
826 F: Float
827 + Send
828 + Sync
829 + Zero
830 + Sum
831 + One
832 + NumAssign
833 + scirs2_core::ndarray::ScalarOperand
834 + 'static,
835{
836 let (m, n) = matrix.dim();
837 if m != n {
838 return Err(LinalgError::ShapeError(
839 "BiCGSTAB requires square matrix".to_string(),
840 ));
841 }
842 if n != b.len() {
843 return Err(LinalgError::ShapeError(
844 "Matrix and vector dimensions incompatible".to_string(),
845 ));
846 }
847
848 let datasize = m * n;
849 if !adaptive::should_use_parallel(datasize, config) {
850 return crate::iterative_solvers::bicgstab(
851 &matrix.view(),
852 &b.view(),
853 max_iter,
854 tolerance,
855 None,
856 );
857 }
858
859 config.apply();
860
861 let mut x = Array1::zeros(n);
863 let ax = parallel_matvec(matrix, &x.view(), config)?;
864 let mut r = b - &ax;
865 let r_hat = r.clone();
866
867 let mut rho = F::one();
868 let mut alpha = F::one();
869 let mut omega = F::one();
870
871 let mut v = Array1::zeros(n);
872 let mut p = Array1::zeros(n);
873
874 for _iter in 0..max_iter {
875 let rho_new = vector_ops::parallel_dot(&r_hat.view(), &r.view(), config)?;
876
877 if rho_new.abs() < F::epsilon() {
878 return Err(LinalgError::ComputationError(
879 "BiCGSTAB breakdown: rho = 0".to_string(),
880 ));
881 }
882
883 let beta = (rho_new / rho) * (alpha / omega);
884
885 let temp = vector_ops::parallel_axpy(-omega, &v.view(), &p.view(), config)?;
887 p = vector_ops::parallel_axpy(
888 F::one(),
889 &r.view(),
890 &vector_ops::parallel_axpy(beta, &temp.view(), &Array1::zeros(n).view(), config)?
891 .view(),
892 config,
893 )?;
894
895 v = parallel_matvec(matrix, &p.view(), config)?;
897
898 alpha = rho_new / vector_ops::parallel_dot(&r_hat.view(), &v.view(), config)?;
899
900 let s = vector_ops::parallel_axpy(-alpha, &v.view(), &r.view(), config)?;
902
903 let s_norm = vector_ops::parallel_norm(&s.view(), config)?;
905 if s_norm < tolerance {
906 x = vector_ops::parallel_axpy(alpha, &p.view(), &x.view(), config)?;
907 return Ok(x);
908 }
909
910 let t = parallel_matvec(matrix, &s.view(), config)?;
912
913 omega = vector_ops::parallel_dot(&t.view(), &s.view(), config)?
914 / vector_ops::parallel_dot(&t.view(), &t.view(), config)?;
915
916 x = vector_ops::parallel_axpy(alpha, &p.view(), &x.view(), config)?;
918 x = vector_ops::parallel_axpy(omega, &s.view(), &x.view(), config)?;
919
920 r = vector_ops::parallel_axpy(-omega, &t.view(), &s.view(), config)?;
922
923 let r_norm = vector_ops::parallel_norm(&r.view(), config)?;
925 if r_norm < tolerance {
926 return Ok(x);
927 }
928
929 rho = rho_new;
930 }
931
932 Err(LinalgError::ComputationError(
933 "BiCGSTAB failed to converge".to_string(),
934 ))
935}
936
937pub fn parallel_jacobi<F>(
942 matrix: &ArrayView2<F>,
943 b: &ArrayView1<F>,
944 max_iter: usize,
945 tolerance: F,
946 config: &WorkerConfig,
947) -> LinalgResult<Array1<F>>
948where
949 F: Float
950 + Send
951 + Sync
952 + Zero
953 + Sum
954 + One
955 + NumAssign
956 + scirs2_core::ndarray::ScalarOperand
957 + 'static,
958{
959 let (m, n) = matrix.dim();
960 if m != n {
961 return Err(LinalgError::ShapeError(
962 "Jacobi method requires square matrix".to_string(),
963 ));
964 }
965 if n != b.len() {
966 return Err(LinalgError::ShapeError(
967 "Matrix and vector dimensions incompatible".to_string(),
968 ));
969 }
970
971 let datasize = m * n;
972 if !adaptive::should_use_parallel(datasize, config) {
973 return crate::iterative_solvers::jacobi_method(
974 &matrix.view(),
975 &b.view(),
976 max_iter,
977 tolerance,
978 None,
979 );
980 }
981
982 config.apply();
983
984 let diag: Vec<F> = (0..n)
986 .into_par_iter()
987 .map(|i| {
988 if matrix[[i, i]].abs() < F::epsilon() {
989 F::one() } else {
991 matrix[[i, i]]
992 }
993 })
994 .collect();
995
996 let mut x = Array1::zeros(n);
997
998 for _iter in 0..max_iter {
999 let x_new_vec: Vec<F> = (0..n)
1001 .into_par_iter()
1002 .map(|i| {
1003 let mut sum = b[i];
1004 for j in 0..n {
1005 if i != j {
1006 sum -= matrix[[i, j]] * x[j];
1007 }
1008 }
1009 sum / diag[i]
1010 })
1011 .collect();
1012
1013 let x_new = Array1::from_vec(x_new_vec);
1014
1015 let diff = &x_new - &x;
1017 let error = vector_ops::parallel_norm(&diff.view(), config)?;
1018
1019 if error < tolerance {
1020 return Ok(x_new);
1021 }
1022
1023 x = x_new.clone();
1024 }
1025
1026 Err(LinalgError::ComputationError(
1027 "Jacobi method failed to converge".to_string(),
1028 ))
1029}
1030
1031pub fn parallel_sor<F>(
1036 matrix: &ArrayView2<F>,
1037 b: &ArrayView1<F>,
1038 omega: F,
1039 max_iter: usize,
1040 tolerance: F,
1041 config: &WorkerConfig,
1042) -> LinalgResult<Array1<F>>
1043where
1044 F: Float
1045 + Send
1046 + Sync
1047 + Zero
1048 + Sum
1049 + One
1050 + NumAssign
1051 + scirs2_core::ndarray::ScalarOperand
1052 + 'static,
1053{
1054 let (m, n) = matrix.dim();
1055 if m != n {
1056 return Err(LinalgError::ShapeError(
1057 "SOR requires square matrix".to_string(),
1058 ));
1059 }
1060 if n != b.len() {
1061 return Err(LinalgError::ShapeError(
1062 "Matrix and vector dimensions incompatible".to_string(),
1063 ));
1064 }
1065
1066 if omega <= F::zero() || omega >= F::from(2.0).unwrap() {
1067 return Err(LinalgError::InvalidInputError(
1068 "Relaxation parameter omega must be in (0, 2)".to_string(),
1069 ));
1070 }
1071
1072 let datasize = m * n;
1073 if !adaptive::should_use_parallel(datasize, config) {
1074 return crate::iterative_solvers::successive_over_relaxation(
1075 &matrix.view(),
1076 &b.view(),
1077 omega,
1078 max_iter,
1079 tolerance,
1080 None,
1081 );
1082 }
1083
1084 config.apply();
1085
1086 let mut x = Array1::zeros(n);
1087
1088 for _iter in 0..max_iter {
1089 let x_old = x.clone();
1090
1091 let red_updates: Vec<(usize, F)> = (0..n)
1094 .into_par_iter()
1095 .filter(|&i| i % 2 == 0)
1096 .map(|i| {
1097 let mut sum = b[i];
1098 for j in 0..n {
1099 if i != j {
1100 sum -= matrix[[i, j]] * x_old[j];
1101 }
1102 }
1103 let x_gs = sum / matrix[[i, i]];
1104 let x_new = (F::one() - omega) * x_old[i] + omega * x_gs;
1105 (i, x_new)
1106 })
1107 .collect();
1108
1109 for (i, val) in red_updates {
1111 x[i] = val;
1112 }
1113
1114 let black_updates: Vec<(usize, F)> = (0..n)
1116 .into_par_iter()
1117 .filter(|&i| i % 2 == 1)
1118 .map(|i| {
1119 let mut sum = b[i];
1120 for j in 0..n {
1121 if i != j {
1122 sum -= matrix[[i, j]] * x[j];
1123 }
1124 }
1125 let x_gs = sum / matrix[[i, i]];
1126 let x_new = (F::one() - omega) * x_old[i] + omega * x_gs;
1127 (i, x_new)
1128 })
1129 .collect();
1130
1131 for (i, val) in black_updates {
1133 x[i] = val;
1134 }
1135
1136 let ax = parallel_matvec(matrix, &x.view(), config)?;
1138 let r = b - &ax;
1139 let error = vector_ops::parallel_norm(&r.view(), config)?;
1140
1141 if error < tolerance {
1142 return Ok(x);
1143 }
1144 }
1145
1146 Err(LinalgError::ComputationError(
1147 "SOR failed to converge".to_string(),
1148 ))
1149}