Skip to main content

math_audio_solvers/preconditioners/
ilu_parallel.rs

1//! Parallel ILU preconditioners
2//!
3//! Two parallel approaches for ILU triangular solves:
4//!
5//! 1. **Graph Coloring (Level Scheduling)**: Groups rows into independent sets (levels)
6//!    that can be solved in parallel. Rows within the same level have no dependencies.
7//!
8//! 2. **Fine-Grained Fixed-Point Iteration**: Uses Jacobi-style iteration to
9//!    approximate the triangular solve. Each iteration is embarrassingly parallel.
10
11use crate::sparse::CsrMatrix;
12use crate::traits::{ComplexField, Preconditioner};
13use ndarray::Array1;
14use num_traits::FromPrimitive;
15
16#[cfg(feature = "rayon")]
17use rayon::prelude::*;
18
19// ============================================================================
20// Graph Coloring (Level Scheduling) ILU
21// ============================================================================
22
23/// Parallel ILU using Graph Coloring / Level Scheduling
24///
25/// This approach analyzes the sparsity pattern of L and U to find independent
26/// rows that can be solved in parallel. Rows are grouped into "levels" where
27/// all rows in a level depend only on rows from previous levels.
28///
29/// Good for matrices with limited fill-in and structured sparsity patterns.
30#[derive(Debug, Clone)]
31pub struct IluColoringPreconditioner<T: ComplexField> {
32    /// Lower triangular factor values
33    l_values: Vec<T>,
34    l_col_indices: Vec<usize>,
35    l_row_ptrs: Vec<usize>,
36    /// Upper triangular factor values
37    u_values: Vec<T>,
38    u_col_indices: Vec<usize>,
39    u_row_ptrs: Vec<usize>,
40    /// Diagonal of U
41    u_diag: Vec<T>,
42    /// Forward solve levels: each level contains row indices that can be solved in parallel
43    forward_levels: Vec<Vec<usize>>,
44    /// Backward solve levels: each level contains row indices that can be solved in parallel
45    backward_levels: Vec<Vec<usize>>,
46    /// Matrix dimension
47    n: usize,
48}
49
50impl<T: ComplexField> IluColoringPreconditioner<T> {
51    /// Create ILU preconditioner with graph coloring from a CSR matrix
52    pub fn from_csr(matrix: &CsrMatrix<T>) -> Self {
53        let n = matrix.num_rows;
54
55        // Perform standard ILU(0) factorization
56        let mut values = matrix.values.clone();
57        let col_indices = matrix.col_indices.clone();
58        let row_ptrs = matrix.row_ptrs.clone();
59
60        // ILU(0) factorization (same as sequential)
61        for i in 0..n {
62            for idx in row_ptrs[i]..row_ptrs[i + 1] {
63                let k = col_indices[idx];
64                if k >= i {
65                    break;
66                }
67
68                let mut u_kk = T::zero();
69                for k_idx in row_ptrs[k]..row_ptrs[k + 1] {
70                    if col_indices[k_idx] == k {
71                        u_kk = values[k_idx];
72                        break;
73                    }
74                }
75
76                if u_kk.norm() < T::Real::from_f64(1e-20).unwrap() {
77                    continue;
78                }
79
80                let l_ik = values[idx] * u_kk.inv();
81                values[idx] = l_ik;
82
83                for j_idx in row_ptrs[i]..row_ptrs[i + 1] {
84                    let j = col_indices[j_idx];
85                    if j <= k {
86                        continue;
87                    }
88
89                    for k_j_idx in row_ptrs[k]..row_ptrs[k + 1] {
90                        if col_indices[k_j_idx] == j {
91                            values[j_idx] = values[j_idx] - l_ik * values[k_j_idx];
92                            break;
93                        }
94                    }
95                }
96            }
97        }
98
99        // Extract L and U
100        let mut l_values = Vec::new();
101        let mut l_col_indices = Vec::new();
102        let mut l_row_ptrs = vec![0];
103
104        let mut u_values = Vec::new();
105        let mut u_col_indices = Vec::new();
106        let mut u_row_ptrs = vec![0];
107        let mut u_diag = vec![T::one(); n];
108
109        for i in 0..n {
110            for idx in row_ptrs[i]..row_ptrs[i + 1] {
111                let j = col_indices[idx];
112                let val = values[idx];
113
114                if j < i {
115                    l_values.push(val);
116                    l_col_indices.push(j);
117                } else {
118                    u_values.push(val);
119                    u_col_indices.push(j);
120                    if j == i {
121                        u_diag[i] = val;
122                    }
123                }
124            }
125            l_row_ptrs.push(l_values.len());
126            u_row_ptrs.push(u_values.len());
127        }
128
129        // Compute level scheduling for L (forward solve)
130        let forward_levels = compute_forward_levels(n, &l_col_indices, &l_row_ptrs);
131
132        // Compute level scheduling for U (backward solve)
133        let backward_levels = compute_backward_levels(n, &u_col_indices, &u_row_ptrs);
134
135        Self {
136            l_values,
137            l_col_indices,
138            l_row_ptrs,
139            u_values,
140            u_col_indices,
141            u_row_ptrs,
142            u_diag,
143            forward_levels,
144            backward_levels,
145            n,
146        }
147    }
148
149    /// Get statistics about the level structure
150    pub fn level_stats(&self) -> (usize, usize, f64, f64) {
151        let fwd_levels = self.forward_levels.len();
152        let bwd_levels = self.backward_levels.len();
153        let avg_fwd = self.n as f64 / fwd_levels as f64;
154        let avg_bwd = self.n as f64 / bwd_levels as f64;
155        (fwd_levels, bwd_levels, avg_fwd, avg_bwd)
156    }
157}
158
159/// Compute forward substitution levels (for lower triangular L)
160fn compute_forward_levels(
161    n: usize,
162    l_col_indices: &[usize],
163    l_row_ptrs: &[usize],
164) -> Vec<Vec<usize>> {
165    // Level of each row: level[i] = max(level[j] for all j where L[i,j] != 0) + 1
166    let mut level = vec![0usize; n];
167
168    for i in 0..n {
169        let mut max_dep_level = 0;
170        for &j in &l_col_indices[l_row_ptrs[i]..l_row_ptrs[i + 1]] {
171            max_dep_level = max_dep_level.max(level[j] + 1);
172        }
173        level[i] = max_dep_level;
174    }
175
176    // Group rows by level
177    let max_level = *level.iter().max().unwrap_or(&0);
178    let mut levels = vec![Vec::new(); max_level + 1];
179    for (i, &lvl) in level.iter().enumerate() {
180        levels[lvl].push(i);
181    }
182
183    levels
184}
185
186/// Compute backward substitution levels (for upper triangular U)
187fn compute_backward_levels(
188    n: usize,
189    u_col_indices: &[usize],
190    u_row_ptrs: &[usize],
191) -> Vec<Vec<usize>> {
192    // For backward solve, we process in reverse order
193    // Level of each row: level[i] = max(level[j] for all j > i where U[i,j] != 0) + 1
194    let mut level = vec![0usize; n];
195
196    for i in (0..n).rev() {
197        let mut max_dep_level = 0;
198        for &j in &u_col_indices[u_row_ptrs[i]..u_row_ptrs[i + 1]] {
199            if j > i {
200                max_dep_level = max_dep_level.max(level[j] + 1);
201            }
202        }
203        level[i] = max_dep_level;
204    }
205
206    // Group rows by level
207    let max_level = *level.iter().max().unwrap_or(&0);
208    let mut levels = vec![Vec::new(); max_level + 1];
209    for (i, &lvl) in level.iter().enumerate() {
210        levels[lvl].push(i);
211    }
212
213    levels
214}
215
216impl<T: ComplexField + Send + Sync> Preconditioner<T> for IluColoringPreconditioner<T> {
217    fn apply(&self, r: &Array1<T>) -> Array1<T> {
218        #[cfg(feature = "rayon")]
219        {
220            if self.n >= 246 {
221                return self.apply_parallel(r);
222            }
223        }
224        self.apply_sequential(r)
225    }
226}
227
228impl<T: ComplexField + Send + Sync> IluColoringPreconditioner<T> {
229    fn apply_sequential(&self, r: &Array1<T>) -> Array1<T> {
230        let mut y = r.clone();
231
232        // Forward substitution
233        for i in 0..self.n {
234            for idx in self.l_row_ptrs[i]..self.l_row_ptrs[i + 1] {
235                let j = self.l_col_indices[idx];
236                let l_ij = self.l_values[idx];
237                y[i] = y[i] - l_ij * y[j];
238            }
239        }
240
241        // Backward substitution
242        let mut x = y;
243        for i in (0..self.n).rev() {
244            for idx in self.u_row_ptrs[i]..self.u_row_ptrs[i + 1] {
245                let j = self.u_col_indices[idx];
246                if j > i {
247                    let u_ij = self.u_values[idx];
248                    x[i] = x[i] - u_ij * x[j];
249                }
250            }
251
252            let u_ii = self.u_diag[i];
253            if u_ii.norm() > T::Real::from_f64(1e-20).unwrap() {
254                x[i] *= u_ii.inv();
255            }
256        }
257
258        x
259    }
260
261    #[cfg(feature = "rayon")]
262    fn apply_parallel(&self, r: &Array1<T>) -> Array1<T> {
263        use std::cell::UnsafeCell;
264
265        // Wrapper to allow shared mutable access (safe because we guarantee non-overlapping writes)
266        struct UnsafeVec<U>(UnsafeCell<Vec<U>>);
267        unsafe impl<U: Send> Sync for UnsafeVec<U> {}
268
269        impl<U: Copy> UnsafeVec<U> {
270            fn get(&self, i: usize) -> U {
271                unsafe { (&(*self.0.get()))[i] }
272            }
273            fn set(&self, i: usize, val: U) {
274                unsafe { (&mut (*self.0.get()))[i] = val }
275            }
276        }
277
278        let y = UnsafeVec(UnsafeCell::new(r.to_vec()));
279
280        // Forward substitution by levels
281        for level in &self.forward_levels {
282            if level.len() > 32 {
283                // Parallel within level
284                level.par_iter().for_each(|&i| {
285                    let mut sum = T::zero();
286                    for idx in self.l_row_ptrs[i]..self.l_row_ptrs[i + 1] {
287                        let j = self.l_col_indices[idx];
288                        let l_ij = self.l_values[idx];
289                        // Safe: j < i always for lower triangular, all j in previous levels
290                        sum += l_ij * y.get(j);
291                    }
292                    // Safe: each thread writes to unique index
293                    y.set(i, y.get(i) - sum);
294                });
295            } else {
296                // Sequential for small levels
297                for &i in level {
298                    let mut sum = T::zero();
299                    for idx in self.l_row_ptrs[i]..self.l_row_ptrs[i + 1] {
300                        let j = self.l_col_indices[idx];
301                        let l_ij = self.l_values[idx];
302                        sum += l_ij * y.get(j);
303                    }
304                    y.set(i, y.get(i) - sum);
305                }
306            }
307        }
308
309        // Move y into x for backward solve
310        let x = UnsafeVec(UnsafeCell::new(y.0.into_inner()));
311
312        // Backward substitution by levels
313        for level in &self.backward_levels {
314            if level.len() > 32 {
315                let u_diag = &self.u_diag;
316                level.par_iter().for_each(|&i| {
317                    let mut sum = T::zero();
318                    for idx in self.u_row_ptrs[i]..self.u_row_ptrs[i + 1] {
319                        let j = self.u_col_indices[idx];
320                        if j > i {
321                            let u_ij = self.u_values[idx];
322                            // Safe: j > i and all j in previous levels
323                            sum += u_ij * x.get(j);
324                        }
325                    }
326                    let xi = x.get(i) - sum;
327                    let u_ii = u_diag[i];
328                    if u_ii.norm() > T::Real::from_f64(1e-20).unwrap() {
329                        x.set(i, xi * u_ii.inv());
330                    } else {
331                        x.set(i, xi);
332                    }
333                });
334            } else {
335                for &i in level {
336                    let mut sum = T::zero();
337                    for idx in self.u_row_ptrs[i]..self.u_row_ptrs[i + 1] {
338                        let j = self.u_col_indices[idx];
339                        if j > i {
340                            let u_ij = self.u_values[idx];
341                            sum += u_ij * x.get(j);
342                        }
343                    }
344                    let xi = x.get(i) - sum;
345                    let u_ii = self.u_diag[i];
346                    if u_ii.norm() > T::Real::from_f64(1e-20).unwrap() {
347                        x.set(i, xi * u_ii.inv());
348                    } else {
349                        x.set(i, xi);
350                    }
351                }
352            }
353        }
354
355        Array1::from_vec(x.0.into_inner())
356    }
357}
358
359// ============================================================================
360// Fine-Grained Fixed-Point Iteration ILU
361// ============================================================================
362
363/// Parallel ILU using Fine-Grained Fixed-Point Iteration
364///
365/// Instead of exact triangular solves, this approach uses Jacobi-style
366/// fixed-point iteration: x^{k+1} = D^{-1}(b - (L+U)x^k)
367///
368/// Each iteration is embarrassingly parallel. Typically 2-5 iterations
369/// are enough for a good approximation.
370///
371/// This trades exact solves for parallelism and can be faster on
372/// highly parallel hardware.
373#[derive(Debug, Clone)]
374pub struct IluFixedPointPreconditioner<T: ComplexField> {
375    /// Lower triangular factor values
376    l_values: Vec<T>,
377    l_col_indices: Vec<usize>,
378    l_row_ptrs: Vec<usize>,
379    /// Upper triangular factor values (excluding diagonal)
380    u_off_values: Vec<T>,
381    u_off_col_indices: Vec<usize>,
382    u_off_row_ptrs: Vec<usize>,
383    /// Inverse diagonal of U (for fast multiplication)
384    u_diag_inv: Vec<T>,
385    /// Number of fixed-point iterations
386    iterations: usize,
387    /// Matrix dimension
388    n: usize,
389}
390
391impl<T: ComplexField> IluFixedPointPreconditioner<T> {
392    /// Create ILU preconditioner with fixed-point iteration
393    ///
394    /// # Arguments
395    /// * `matrix` - The sparse matrix to precondition
396    /// * `iterations` - Number of fixed-point iterations (typically 2-5)
397    pub fn from_csr(matrix: &CsrMatrix<T>, iterations: usize) -> Self {
398        let n = matrix.num_rows;
399
400        // Perform standard ILU(0) factorization
401        let mut values = matrix.values.clone();
402        let col_indices = matrix.col_indices.clone();
403        let row_ptrs = matrix.row_ptrs.clone();
404
405        for i in 0..n {
406            for idx in row_ptrs[i]..row_ptrs[i + 1] {
407                let k = col_indices[idx];
408                if k >= i {
409                    break;
410                }
411
412                let mut u_kk = T::zero();
413                for k_idx in row_ptrs[k]..row_ptrs[k + 1] {
414                    if col_indices[k_idx] == k {
415                        u_kk = values[k_idx];
416                        break;
417                    }
418                }
419
420                if u_kk.norm() < T::Real::from_f64(1e-20).unwrap() {
421                    continue;
422                }
423
424                let l_ik = values[idx] * u_kk.inv();
425                values[idx] = l_ik;
426
427                for j_idx in row_ptrs[i]..row_ptrs[i + 1] {
428                    let j = col_indices[j_idx];
429                    if j <= k {
430                        continue;
431                    }
432
433                    for k_j_idx in row_ptrs[k]..row_ptrs[k + 1] {
434                        if col_indices[k_j_idx] == j {
435                            values[j_idx] = values[j_idx] - l_ik * values[k_j_idx];
436                            break;
437                        }
438                    }
439                }
440            }
441        }
442
443        // Extract L, U (off-diagonal), and D^{-1}
444        let mut l_values = Vec::new();
445        let mut l_col_indices = Vec::new();
446        let mut l_row_ptrs = vec![0];
447
448        let mut u_off_values = Vec::new();
449        let mut u_off_col_indices = Vec::new();
450        let mut u_off_row_ptrs = vec![0];
451
452        let mut u_diag_inv = vec![T::one(); n];
453
454        for i in 0..n {
455            for idx in row_ptrs[i]..row_ptrs[i + 1] {
456                let j = col_indices[idx];
457                let val = values[idx];
458
459                if j < i {
460                    // Lower triangular (L)
461                    l_values.push(val);
462                    l_col_indices.push(j);
463                } else if j == i {
464                    // Diagonal
465                    if val.norm() > T::Real::from_f64(1e-20).unwrap() {
466                        u_diag_inv[i] = val.inv();
467                    }
468                } else {
469                    // Upper triangular off-diagonal
470                    u_off_values.push(val);
471                    u_off_col_indices.push(j);
472                }
473            }
474            l_row_ptrs.push(l_values.len());
475            u_off_row_ptrs.push(u_off_values.len());
476        }
477
478        Self {
479            l_values,
480            l_col_indices,
481            l_row_ptrs,
482            u_off_values,
483            u_off_col_indices,
484            u_off_row_ptrs,
485            u_diag_inv,
486            iterations,
487            n,
488        }
489    }
490
491    /// Create with default 3 iterations
492    pub fn from_csr_default(matrix: &CsrMatrix<T>) -> Self {
493        Self::from_csr(matrix, 3)
494    }
495}
496
497impl<T: ComplexField + Send + Sync> Preconditioner<T> for IluFixedPointPreconditioner<T> {
498    fn apply(&self, r: &Array1<T>) -> Array1<T> {
499        #[cfg(feature = "rayon")]
500        {
501            if self.n >= 246 {
502                return self.apply_parallel(r);
503            }
504        }
505        self.apply_sequential(r)
506    }
507}
508
509impl<T: ComplexField + Send + Sync> IluFixedPointPreconditioner<T> {
510    fn apply_sequential(&self, r: &Array1<T>) -> Array1<T> {
511        // Solve (L+D+U)x = r using fixed-point iteration
512        // Split: (D)(I + D^{-1}(L+U))x = r
513        // Iteration: x^{k+1} = D^{-1}(r - (L+U)x^k)
514
515        // Initial guess: x = D^{-1} r
516        let mut x: Vec<T> = r
517            .iter()
518            .zip(self.u_diag_inv.iter())
519            .map(|(&ri, &di)| ri * di)
520            .collect();
521
522        for _ in 0..self.iterations {
523            let mut x_new = vec![T::zero(); self.n];
524
525            for i in 0..self.n {
526                let mut sum = r[i];
527
528                // Subtract L*x contribution
529                for idx in self.l_row_ptrs[i]..self.l_row_ptrs[i + 1] {
530                    let j = self.l_col_indices[idx];
531                    sum -= self.l_values[idx] * x[j];
532                }
533
534                // Subtract U_off*x contribution
535                for idx in self.u_off_row_ptrs[i]..self.u_off_row_ptrs[i + 1] {
536                    let j = self.u_off_col_indices[idx];
537                    sum -= self.u_off_values[idx] * x[j];
538                }
539
540                x_new[i] = sum * self.u_diag_inv[i];
541            }
542
543            x = x_new;
544        }
545
546        Array1::from_vec(x)
547    }
548
549    #[cfg(feature = "rayon")]
550    fn apply_parallel(&self, r: &Array1<T>) -> Array1<T> {
551        // Initial guess: x = D^{-1} r
552        let mut x: Vec<T> = r
553            .iter()
554            .zip(self.u_diag_inv.iter())
555            .map(|(&ri, &di)| ri * di)
556            .collect();
557
558        let r_slice = r.as_slice().expect("Array should be contiguous");
559
560        for _ in 0..self.iterations {
561            // Each row can be computed independently
562            let x_new: Vec<T> = (0..self.n)
563                .into_par_iter()
564                .map(|i| {
565                    let mut sum = r_slice[i];
566
567                    // Subtract L*x contribution
568                    for idx in self.l_row_ptrs[i]..self.l_row_ptrs[i + 1] {
569                        let j = self.l_col_indices[idx];
570                        sum -= self.l_values[idx] * x[j];
571                    }
572
573                    // Subtract U_off*x contribution
574                    for idx in self.u_off_row_ptrs[i]..self.u_off_row_ptrs[i + 1] {
575                        let j = self.u_off_col_indices[idx];
576                        sum -= self.u_off_values[idx] * x[j];
577                    }
578
579                    sum * self.u_diag_inv[i]
580                })
581                .collect();
582
583            x = x_new;
584        }
585
586        Array1::from_vec(x)
587    }
588}
589
590#[cfg(test)]
591mod tests {
592    use super::*;
593    use crate::iterative::{GmresConfig, gmres_preconditioned};
594    use num_complex::Complex64;
595
596    fn create_test_matrix() -> CsrMatrix<Complex64> {
597        let n = 10;
598        let mut dense = ndarray::Array2::from_elem((n, n), Complex64::new(0.0, 0.0));
599
600        for i in 0..n {
601            dense[[i, i]] = Complex64::new(4.0, 0.0);
602            if i > 0 {
603                dense[[i, i - 1]] = Complex64::new(-1.0, 0.0);
604            }
605            if i < n - 1 {
606                dense[[i, i + 1]] = Complex64::new(-1.0, 0.0);
607            }
608        }
609
610        CsrMatrix::from_dense(&dense, 1e-15)
611    }
612
613    #[test]
614    fn test_ilu_coloring_basic() {
615        let matrix = create_test_matrix();
616        let precond = IluColoringPreconditioner::from_csr(&matrix);
617
618        let r = Array1::from_iter((0..10).map(|i| Complex64::new((i as f64).sin(), 0.0)));
619        let result = precond.apply(&r);
620
621        // Should produce a reasonable result
622        assert_eq!(result.len(), 10);
623        assert!(result.iter().all(|x| x.norm() < 100.0));
624    }
625
626    #[test]
627    fn test_ilu_fixedpoint_basic() {
628        let matrix = create_test_matrix();
629        let precond = IluFixedPointPreconditioner::from_csr(&matrix, 3);
630
631        let r = Array1::from_iter((0..10).map(|i| Complex64::new((i as f64).sin(), 0.0)));
632        let result = precond.apply(&r);
633
634        assert_eq!(result.len(), 10);
635        assert!(result.iter().all(|x| x.norm() < 100.0));
636    }
637
638    #[test]
639    fn test_ilu_coloring_with_gmres() {
640        let matrix = create_test_matrix();
641        let precond = IluColoringPreconditioner::from_csr(&matrix);
642
643        let b = Array1::from_iter((0..10).map(|i| Complex64::new((i as f64).sin(), 0.0)));
644
645        let config = GmresConfig {
646            max_iterations: 50,
647            restart: 10,
648            tolerance: 1e-10,
649            print_interval: 0,
650        };
651
652        let sol = gmres_preconditioned(&matrix, &precond, &b, &config);
653        assert!(sol.converged, "GMRES with ILU-coloring should converge");
654    }
655
656    #[test]
657    fn test_ilu_fixedpoint_with_gmres() {
658        let matrix = create_test_matrix();
659        let precond = IluFixedPointPreconditioner::from_csr(&matrix, 3);
660
661        let b = Array1::from_iter((0..10).map(|i| Complex64::new((i as f64).sin(), 0.0)));
662
663        let config = GmresConfig {
664            max_iterations: 50,
665            restart: 10,
666            tolerance: 1e-10,
667            print_interval: 0,
668        };
669
670        let sol = gmres_preconditioned(&matrix, &precond, &b, &config);
671        assert!(sol.converged, "GMRES with ILU-fixedpoint should converge");
672    }
673
674    #[test]
675    fn test_level_stats() {
676        let matrix = create_test_matrix();
677        let precond = IluColoringPreconditioner::from_csr(&matrix);
678
679        let (fwd, bwd, avg_fwd, avg_bwd) = precond.level_stats();
680
681        // Tridiagonal matrix should have limited parallelism
682        assert!(fwd > 0);
683        assert!(bwd > 0);
684        assert!(avg_fwd > 0.0);
685        assert!(avg_bwd > 0.0);
686    }
687}