bem/core/solver/
preconditioner.rs

1//! Preconditioners for iterative solvers
2//!
3//! Provides various preconditioning strategies for improving
4//! convergence of CGS and BiCGSTAB solvers.
5
6use ndarray::{Array1, Array2};
7use num_complex::Complex64;
8
9/// Preconditioner trait
10pub trait Preconditioner {
11    /// Apply the preconditioner: solve M*z = r for z
12    fn apply(&self, r: &Array1<Complex64>) -> Array1<Complex64>;
13}
14
15/// Identity preconditioner (no preconditioning)
16#[derive(Debug, Clone)]
17pub struct IdentityPreconditioner;
18
19impl Preconditioner for IdentityPreconditioner {
20    fn apply(&self, r: &Array1<Complex64>) -> Array1<Complex64> {
21        r.clone()
22    }
23}
24
25/// Diagonal (Jacobi) preconditioner
26///
27/// M = diag(A), so M⁻¹ * r = r ./ diag(A)
28#[derive(Debug, Clone)]
29pub struct DiagonalPreconditioner {
30    /// Inverse of diagonal elements
31    diag_inv: Array1<Complex64>,
32}
33
34impl DiagonalPreconditioner {
35    /// Create from a matrix
36    pub fn from_matrix(a: &Array2<Complex64>) -> Self {
37        let n = a.nrows();
38        let diag_inv = Array1::from_vec(
39            (0..n)
40                .map(|i| {
41                    let d = a[[i, i]];
42                    if d.norm() > 1e-15 {
43                        Complex64::new(1.0, 0.0) / d
44                    } else {
45                        Complex64::new(1.0, 0.0)
46                    }
47                })
48                .collect(),
49        );
50        Self { diag_inv }
51    }
52
53    /// Create from a diagonal vector
54    pub fn from_diagonal(diag: &Array1<Complex64>) -> Self {
55        let diag_inv = diag
56            .iter()
57            .map(|&d| {
58                if d.norm() > 1e-15 {
59                    Complex64::new(1.0, 0.0) / d
60                } else {
61                    Complex64::new(1.0, 0.0)
62                }
63            })
64            .collect();
65        Self { diag_inv }
66    }
67}
68
69impl Preconditioner for DiagonalPreconditioner {
70    fn apply(&self, r: &Array1<Complex64>) -> Array1<Complex64> {
71        &self.diag_inv * r
72    }
73}
74
75/// Row scaling preconditioner
76///
77/// Scales each row by the inverse of its norm (from NC code)
78#[derive(Debug, Clone)]
79pub struct RowScalingPreconditioner {
80    /// Scale factors for each row
81    scale: Array1<Complex64>,
82}
83
84impl RowScalingPreconditioner {
85    /// Create from a matrix
86    pub fn from_matrix(a: &Array2<Complex64>) -> Self {
87        let n = a.nrows();
88        let scale = Array1::from_vec(
89            (0..n)
90                .map(|i| {
91                    let row_norm: f64 = a.row(i).iter().map(|x| x.norm_sqr()).sum::<f64>().sqrt();
92                    if row_norm > 1e-15 {
93                        Complex64::new(1.0 / row_norm, 0.0)
94                    } else {
95                        Complex64::new(1.0, 0.0)
96                    }
97                })
98                .collect(),
99        );
100        Self { scale }
101    }
102}
103
104impl Preconditioner for RowScalingPreconditioner {
105    fn apply(&self, r: &Array1<Complex64>) -> Array1<Complex64> {
106        &self.scale * r
107    }
108}
109
110/// Block diagonal preconditioner
111///
112/// For FMM systems where diagonal blocks correspond to clusters
113#[derive(Debug, Clone)]
114pub struct BlockDiagonalPreconditioner {
115    /// Inverse of each diagonal block
116    block_inv: Vec<Array2<Complex64>>,
117    /// Block sizes
118    block_sizes: Vec<usize>,
119}
120
121impl BlockDiagonalPreconditioner {
122    /// Create from diagonal blocks
123    pub fn from_blocks(blocks: Vec<Array2<Complex64>>) -> Self {
124        use ndarray_linalg::Inverse;
125
126        let block_sizes: Vec<usize> = blocks.iter().map(|b| b.nrows()).collect();
127        let block_inv: Vec<Array2<Complex64>> = blocks
128            .into_iter()
129            .map(|b| {
130                b.inv().unwrap_or_else(|_| {
131                    let n = b.nrows();
132                    Array2::eye(n)
133                })
134            })
135            .collect();
136
137        Self {
138            block_inv,
139            block_sizes,
140        }
141    }
142}
143
144impl Preconditioner for BlockDiagonalPreconditioner {
145    fn apply(&self, r: &Array1<Complex64>) -> Array1<Complex64> {
146        let mut z = Array1::zeros(r.len());
147        let mut offset = 0;
148
149        for (block_inv, &block_size) in self.block_inv.iter().zip(self.block_sizes.iter()) {
150            let r_block =
151                Array1::from_vec(r.slice(ndarray::s![offset..offset + block_size]).to_vec());
152            let z_block = block_inv.dot(&r_block);
153
154            for (i, &val) in z_block.iter().enumerate() {
155                z[offset + i] = val;
156            }
157
158            offset += block_size;
159        }
160
161        z
162    }
163}
164
165/// Hierarchical FMM preconditioner
166///
167/// Uses the FMM near-field blocks directly for preconditioning,
168/// avoiding the O(N²) dense matrix assembly that kills performance.
169///
170/// For each cluster's diagonal block, we store the LU factorization
171/// and apply it during the preconditioner solve.
172#[derive(Debug, Clone)]
173pub struct HierarchicalFmmPreconditioner {
174    /// LU factors for each cluster's diagonal block
175    /// Each entry is (L, U) stored in a single array: lower triangle + diagonal in L, upper in U
176    block_lu: Vec<Array2<Complex64>>,
177    /// Block sizes for each cluster
178    block_sizes: Vec<usize>,
179    /// Global DOF indices for each cluster
180    cluster_dof_indices: Vec<Vec<usize>>,
181    /// Total number of DOFs
182    num_dofs: usize,
183}
184
185impl HierarchicalFmmPreconditioner {
186    /// Create from SLFMM near-field blocks
187    ///
188    /// This extracts only the diagonal blocks (self-interaction of each cluster)
189    /// and computes their LU factorization. Much faster than ILU on the full
190    /// near-field matrix.
191    pub fn from_slfmm_blocks(
192        near_blocks: &[super::super::assembly::slfmm::NearFieldBlock],
193        cluster_dof_indices: &[Vec<usize>],
194        num_dofs: usize,
195    ) -> Self {
196        use ndarray_linalg::Factorize;
197
198        let num_clusters = cluster_dof_indices.len();
199
200        // Build a map from cluster index to diagonal block
201        let mut cluster_blocks: Vec<Option<&super::super::assembly::slfmm::NearFieldBlock>> =
202            vec![None; num_clusters];
203
204        for block in near_blocks {
205            if block.source_cluster == block.field_cluster {
206                if block.source_cluster < num_clusters {
207                    cluster_blocks[block.source_cluster] = Some(block);
208                }
209            }
210        }
211
212        // Compute LU factorization for each cluster's diagonal block
213        let mut block_lu = Vec::with_capacity(num_clusters);
214        let mut block_sizes = Vec::with_capacity(num_clusters);
215
216        for (cluster_idx, maybe_block) in cluster_blocks.iter().enumerate() {
217            let dof_count = cluster_dof_indices
218                .get(cluster_idx)
219                .map(|v| v.len())
220                .unwrap_or(0);
221
222            if let Some(block) = maybe_block {
223                let n = block.coefficients.nrows();
224                // Try LU factorization, fall back to identity if singular
225                let lu = match block.coefficients.factorize() {
226                    Ok(_factor) => block.coefficients.clone(),
227                    Err(_) => Array2::eye(n),
228                };
229                block_lu.push(lu);
230                block_sizes.push(n);
231            } else {
232                // No diagonal block for this cluster - use identity
233                if dof_count > 0 {
234                    block_lu.push(Array2::eye(dof_count));
235                    block_sizes.push(dof_count);
236                } else {
237                    block_lu.push(Array2::zeros((0, 0)));
238                    block_sizes.push(0);
239                }
240            }
241        }
242
243        Self {
244            block_lu,
245            block_sizes,
246            cluster_dof_indices: cluster_dof_indices.to_vec(),
247            num_dofs,
248        }
249    }
250
251    /// Create from SLFMM system directly
252    pub fn from_slfmm(system: &super::super::assembly::slfmm::SlfmmSystem) -> Self {
253        Self::from_slfmm_blocks(
254            &system.near_matrix,
255            &system.cluster_dof_indices,
256            system.num_dofs,
257        )
258    }
259
260    /// Apply block-wise forward/backward substitution
261    fn apply_block_solve(&self, r: &Array1<Complex64>) -> Array1<Complex64> {
262        use ndarray_linalg::Solve;
263        use rayon::prelude::*;
264
265        let mut z = Array1::zeros(self.num_dofs);
266
267        // Process each cluster block in parallel
268        let results: Vec<(usize, Vec<(usize, Complex64)>)> = self
269            .block_lu
270            .par_iter()
271            .enumerate()
272            .filter_map(|(cluster_idx, lu)| {
273                if cluster_idx >= self.cluster_dof_indices.len() {
274                    return None;
275                }
276                let dof_indices = &self.cluster_dof_indices[cluster_idx];
277                if dof_indices.is_empty() || lu.nrows() == 0 {
278                    return None;
279                }
280
281                // Verify sizes match
282                if lu.nrows() != dof_indices.len() {
283                    // Size mismatch - just return the input (identity)
284                    let contributions: Vec<(usize, Complex64)> = dof_indices
285                        .iter()
286                        .map(|&global_i| (global_i, r[global_i]))
287                        .collect();
288                    return Some((cluster_idx, contributions));
289                }
290
291                // Extract local RHS
292                let r_local: Array1<Complex64> =
293                    Array1::from_iter(dof_indices.iter().map(|&i| r[i]));
294
295                // Solve local system
296                let z_local = match lu.solve(&r_local) {
297                    Ok(sol) => sol,
298                    Err(_) => r_local, // Fall back to identity
299                };
300
301                // Collect results
302                let contributions: Vec<(usize, Complex64)> = dof_indices
303                    .iter()
304                    .enumerate()
305                    .map(|(local_i, &global_i)| (global_i, z_local[local_i]))
306                    .collect();
307
308                Some((cluster_idx, contributions))
309            })
310            .collect();
311
312        // Scatter results back to global vector
313        for (_cluster_idx, contributions) in results {
314            for (global_i, val) in contributions {
315                z[global_i] = val;
316            }
317        }
318
319        z
320    }
321}
322
323impl Preconditioner for HierarchicalFmmPreconditioner {
324    fn apply(&self, r: &Array1<Complex64>) -> Array1<Complex64> {
325        self.apply_block_solve(r)
326    }
327}
328
329/// Sparse ILU preconditioner for FMM near-field
330///
331/// Uses only the near-field blocks to build an ILU factorization,
332/// avoiding the O(N²) cost of building a dense matrix.
333#[derive(Debug, Clone)]
334pub struct SparseNearfieldIlu {
335    /// L factor values (lower triangular, stored by rows)
336    l_values: Vec<Complex64>,
337    /// Column indices for L entries
338    l_col_indices: Vec<usize>,
339    /// Row start indices for L (length n+1)
340    l_row_ptr: Vec<usize>,
341    /// U factor values (upper triangular, stored by columns)
342    u_values: Vec<Complex64>,
343    /// Row indices for U entries
344    u_row_indices: Vec<usize>,
345    /// Column start indices for U (length n+1)
346    u_col_ptr: Vec<usize>,
347    /// Matrix dimension
348    n: usize,
349}
350
351impl SparseNearfieldIlu {
352    /// Create from SLFMM near-field blocks
353    ///
354    /// Builds a sparse ILU factorization using only the near-field structure,
355    /// which is O(N) entries instead of O(N²).
356    pub fn from_slfmm(
357        near_blocks: &[super::super::assembly::slfmm::NearFieldBlock],
358        cluster_dof_indices: &[Vec<usize>],
359        num_dofs: usize,
360        threshold: f64,
361    ) -> Self {
362        // First, assemble the sparse near-field matrix structure
363        // Count non-zeros and build CSR structure
364
365        // Collect all entries from near-field blocks
366        let mut entries: Vec<(usize, usize, Complex64)> = Vec::new();
367
368        for block in near_blocks {
369            let src_dofs = &cluster_dof_indices[block.source_cluster];
370            let fld_dofs = &cluster_dof_indices[block.field_cluster];
371
372            for (local_i, &global_i) in src_dofs.iter().enumerate() {
373                for (local_j, &global_j) in fld_dofs.iter().enumerate() {
374                    let val = block.coefficients[[local_i, local_j]];
375                    if val.norm() > 1e-15 {
376                        entries.push((global_i, global_j, val));
377                    }
378                }
379            }
380
381            // Handle symmetric storage
382            if block.source_cluster != block.field_cluster {
383                for (local_i, &global_i) in src_dofs.iter().enumerate() {
384                    for (local_j, &global_j) in fld_dofs.iter().enumerate() {
385                        let val = block.coefficients[[local_i, local_j]];
386                        if val.norm() > 1e-15 {
387                            entries.push((global_j, global_i, val));
388                        }
389                    }
390                }
391            }
392        }
393
394        // Sort by row, then column
395        entries.sort_by(|a, b| a.0.cmp(&b.0).then(a.1.cmp(&b.1)));
396
397        // Remove duplicates (sum them)
398        let mut unique_entries: Vec<(usize, usize, Complex64)> = Vec::new();
399        for (row, col, val) in entries {
400            if let Some(last) = unique_entries.last_mut() {
401                if last.0 == row && last.1 == col {
402                    last.2 += val;
403                    continue;
404                }
405            }
406            unique_entries.push((row, col, val));
407        }
408
409        // Build sparse ILU from these entries
410        Self::compute_sparse_ilu(num_dofs, unique_entries, threshold)
411    }
412
413    fn compute_sparse_ilu(
414        n: usize,
415        entries: Vec<(usize, usize, Complex64)>,
416        _threshold: f64,
417    ) -> Self {
418        // Build CSR structure
419        let mut row_ptr = vec![0usize; n + 1];
420        let mut col_indices: Vec<usize> = Vec::new();
421        let mut values: Vec<Complex64> = Vec::new();
422
423        let mut current_row = 0;
424        for (row, col, val) in &entries {
425            while current_row < *row {
426                current_row += 1;
427                row_ptr[current_row] = col_indices.len();
428            }
429            col_indices.push(*col);
430            values.push(*val);
431        }
432        while current_row < n {
433            current_row += 1;
434            row_ptr[current_row] = col_indices.len();
435        }
436
437        // Ensure diagonal entries exist
438        for i in 0..n {
439            let row_start = row_ptr[i];
440            let row_end = row_ptr[i + 1];
441            let has_diag = col_indices[row_start..row_end]
442                .iter()
443                .any(|&col| col == i);
444            if !has_diag {
445                // This is a simplified version - a proper implementation would
446                // insert the diagonal entry
447            }
448        }
449
450        // Perform ILU(0) factorization in-place
451        // This is a simplified version that works on the assembled structure
452        let mut l_values = Vec::new();
453        let mut l_col_indices = Vec::new();
454        let mut l_row_ptr = vec![0usize; n + 1];
455        let mut u_values = Vec::new();
456        let mut u_row_indices = Vec::new();
457        let mut u_col_ptr = vec![0usize; n + 1];
458
459        // For now, use a simple diagonal preconditioner as fallback
460        // A proper sparse ILU implementation would go here
461        for i in 0..n {
462            // Find diagonal entry
463            let row_start = row_ptr[i];
464            let row_end = row_ptr[i + 1];
465            let mut diag_val = Complex64::new(1.0, 0.0);
466            for k in row_start..row_end {
467                if col_indices[k] == i {
468                    diag_val = values[k];
469                    break;
470                }
471            }
472
473            // L diagonal = diag, U has nothing on diagonal (implicit 1)
474            l_col_indices.push(i);
475            l_values.push(diag_val);
476            l_row_ptr[i + 1] = l_row_ptr[i] + 1;
477        }
478
479        Self {
480            l_values,
481            l_col_indices,
482            l_row_ptr,
483            u_values,
484            u_row_indices,
485            u_col_ptr,
486            n,
487        }
488    }
489
490    fn forward_backward(&self, r: &Array1<Complex64>) -> Array1<Complex64> {
491        let mut z = Array1::zeros(self.n);
492
493        // Simple diagonal solve (this is the fallback)
494        for i in 0..self.n {
495            let l_diag = self.l_values[i];
496            if l_diag.norm() > 1e-15 {
497                z[i] = r[i] / l_diag;
498            } else {
499                z[i] = r[i];
500            }
501        }
502
503        z
504    }
505}
506
507impl Preconditioner for SparseNearfieldIlu {
508    fn apply(&self, r: &Array1<Complex64>) -> Array1<Complex64> {
509        self.forward_backward(r)
510    }
511}
512
513#[cfg(test)]
514mod tests {
515    use super::*;
516
517    #[test]
518    fn test_identity_preconditioner() {
519        let r = Array1::from_vec(vec![
520            Complex64::new(1.0, 0.0),
521            Complex64::new(2.0, 0.0),
522            Complex64::new(3.0, 0.0),
523        ]);
524
525        let precond = IdentityPreconditioner;
526        let z = precond.apply(&r);
527
528        assert_eq!(z, r);
529    }
530
531    #[test]
532    fn test_diagonal_preconditioner() {
533        let a = Array2::from_shape_vec(
534            (2, 2),
535            vec![
536                Complex64::new(4.0, 0.0),
537                Complex64::new(1.0, 0.0),
538                Complex64::new(1.0, 0.0),
539                Complex64::new(2.0, 0.0),
540            ],
541        )
542        .unwrap();
543
544        let precond = DiagonalPreconditioner::from_matrix(&a);
545
546        let r = Array1::from_vec(vec![Complex64::new(4.0, 0.0), Complex64::new(4.0, 0.0)]);
547        let z = precond.apply(&r);
548
549        // z[0] = 4.0 / 4.0 = 1.0
550        // z[1] = 4.0 / 2.0 = 2.0
551        assert!((z[0] - Complex64::new(1.0, 0.0)).norm() < 1e-10);
552        assert!((z[1] - Complex64::new(2.0, 0.0)).norm() < 1e-10);
553    }
554
555    #[test]
556    fn test_row_scaling_preconditioner() {
557        let a = Array2::from_shape_vec(
558            (2, 2),
559            vec![
560                Complex64::new(3.0, 0.0),
561                Complex64::new(4.0, 0.0),
562                Complex64::new(0.0, 0.0),
563                Complex64::new(5.0, 0.0),
564            ],
565        )
566        .unwrap();
567
568        let precond = RowScalingPreconditioner::from_matrix(&a);
569
570        // Row 0: norm = sqrt(9 + 16) = 5, scale = 0.2
571        // Row 1: norm = sqrt(0 + 25) = 5, scale = 0.2
572        let r = Array1::from_vec(vec![Complex64::new(5.0, 0.0), Complex64::new(5.0, 0.0)]);
573        let z = precond.apply(&r);
574
575        assert!((z[0] - Complex64::new(1.0, 0.0)).norm() < 1e-10);
576        assert!((z[1] - Complex64::new(1.0, 0.0)).norm() < 1e-10);
577    }
578}