math_audio_solvers/preconditioners/
amg.rs

1//! Algebraic Multigrid (AMG) Preconditioner
2//!
3//! This module implements an algebraic multigrid preconditioner inspired by
4//! hypre's BoomerAMG, designed for better parallel scalability across CPU cores.
5//!
6//! ## Features
7//!
8//! - **Parallel coarsening**: Classical Ruge-Stüben (RS) and PMIS algorithms
9//! - **Interpolation**: Standard and extended interpolation operators
10//! - **Smoothers**: Jacobi (fully parallel) and symmetric Gauss-Seidel
11//! - **V-cycle**: Standard V(ν₁, ν₂) cycling with configurable pre/post smoothing
12//!
13//! ## Scalability
14//!
15//! The AMG preconditioner scales better than ILU across multiple cores because:
16//! - Coarsening can be parallelized (PMIS is inherently parallel)
17//! - Jacobi smoothing is embarrassingly parallel
18//! - Each level's operations can be parallelized independently
19//!
20//! ## Usage
21//!
22//! ```ignore
23//! use math_audio_solvers::{AmgPreconditioner, AmgConfig, CsrMatrix};
24//!
25//! let config = AmgConfig::default();
26//! let precond = AmgPreconditioner::from_csr(&matrix, config);
27//!
28//! // Use with GMRES
29//! let z = precond.apply(&residual);
30//! ```
31
32#[cfg(any(feature = "native", feature = "wasm"))]
33use crate::parallel::{parallel_enumerate_map, parallel_map_indexed};
34use crate::sparse::CsrMatrix;
35use crate::traits::{ComplexField, Preconditioner};
36use ndarray::Array1;
37use num_traits::FromPrimitive;
38
39/// Coarsening algorithm for AMG hierarchy construction
40#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
41pub enum AmgCoarsening {
42    /// Classical Ruge-Stüben coarsening
43    /// Good quality but limited parallelism in the selection phase
44    #[default]
45    RugeStuben,
46
47    /// Parallel Modified Independent Set (PMIS)
48    /// Better parallel scalability, may produce slightly larger coarse grids
49    Pmis,
50
51    /// Hybrid MIS (HMIS) - PMIS in first pass, then RS cleanup
52    /// Balance between quality and parallelism
53    Hmis,
54}
55
56/// Interpolation operator type
57#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
58pub enum AmgInterpolation {
59    /// Standard interpolation - direct interpolation from coarse neighbors
60    #[default]
61    Standard,
62
63    /// Extended interpolation - includes indirect (distance-2) connections
64    /// Better for some problem types but more expensive
65    Extended,
66
67    /// Direct interpolation - simplest, only immediate strong connections
68    /// Fastest but may have poor convergence for hard problems
69    Direct,
70}
71
72/// Smoother type for AMG relaxation
73#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
74pub enum AmgSmoother {
75    /// Jacobi relaxation - fully parallel, requires damping (ω ≈ 0.6-0.8)
76    #[default]
77    Jacobi,
78
79    /// l1-Jacobi - Jacobi with l1 norm scaling, more robust
80    L1Jacobi,
81
82    /// Symmetric Gauss-Seidel - forward then backward sweep
83    /// Better convergence but limited parallelism
84    SymmetricGaussSeidel,
85
86    /// Chebyshev polynomial smoother - fully parallel, no damping needed
87    Chebyshev,
88}
89
90/// AMG cycle type
91#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
92pub enum AmgCycle {
93    /// V-cycle: one visit to each level
94    #[default]
95    VCycle,
96
97    /// W-cycle: two visits to coarser levels (more expensive)
98    WCycle,
99
100    /// F-cycle: hybrid between V and W
101    FCycle,
102}
103
104/// Configuration for AMG preconditioner
105#[derive(Debug, Clone)]
106pub struct AmgConfig {
107    /// Coarsening algorithm
108    pub coarsening: AmgCoarsening,
109
110    /// Interpolation operator type
111    pub interpolation: AmgInterpolation,
112
113    /// Smoother for pre- and post-relaxation
114    pub smoother: AmgSmoother,
115
116    /// Cycle type (V, W, or F)
117    pub cycle: AmgCycle,
118
119    /// Strong connection threshold (default: 0.25)
120    /// Connections with |a_ij| >= θ * max_k |a_ik| are considered strong
121    pub strong_threshold: f64,
122
123    /// Maximum number of levels in the hierarchy
124    pub max_levels: usize,
125
126    /// Coarsest level size - switch to direct solve below this
127    pub coarse_size: usize,
128
129    /// Number of pre-smoothing sweeps (ν₁)
130    pub num_pre_smooth: usize,
131
132    /// Number of post-smoothing sweeps (ν₂)
133    pub num_post_smooth: usize,
134
135    /// Jacobi damping parameter (ω)
136    pub jacobi_weight: f64,
137
138    /// Truncation factor for interpolation (drop small weights)
139    pub trunc_factor: f64,
140
141    /// Maximum interpolation stencil size per row
142    pub max_interp_elements: usize,
143
144    /// Enable aggressive coarsening on first few levels
145    pub aggressive_coarsening_levels: usize,
146}
147
148impl Default for AmgConfig {
149    fn default() -> Self {
150        Self {
151            coarsening: AmgCoarsening::default(),
152            interpolation: AmgInterpolation::default(),
153            smoother: AmgSmoother::default(),
154            cycle: AmgCycle::default(),
155            strong_threshold: 0.25,
156            max_levels: 25,
157            coarse_size: 50,
158            num_pre_smooth: 1,
159            num_post_smooth: 1,
160            jacobi_weight: 0.6667, // 2/3 is optimal for Poisson
161            trunc_factor: 0.0,
162            max_interp_elements: 4,
163            aggressive_coarsening_levels: 0,
164        }
165    }
166}
167
168impl AmgConfig {
169    /// Configuration optimized for BEM systems
170    ///
171    /// BEM matrices are typically denser and less sparse than FEM,
172    /// requiring adjusted thresholds.
173    pub fn for_bem() -> Self {
174        Self {
175            strong_threshold: 0.5,           // Higher for denser BEM matrices
176            coarsening: AmgCoarsening::Pmis, // Better parallel scalability
177            smoother: AmgSmoother::L1Jacobi, // More robust for BEM
178            max_interp_elements: 6,
179            ..Default::default()
180        }
181    }
182
183    /// Configuration optimized for FEM systems
184    pub fn for_fem() -> Self {
185        Self {
186            strong_threshold: 0.25,
187            coarsening: AmgCoarsening::RugeStuben,
188            smoother: AmgSmoother::SymmetricGaussSeidel,
189            ..Default::default()
190        }
191    }
192
193    /// Configuration optimized for maximum parallel scalability
194    pub fn for_parallel() -> Self {
195        Self {
196            coarsening: AmgCoarsening::Pmis,
197            smoother: AmgSmoother::Jacobi,
198            jacobi_weight: 0.8,
199            num_pre_smooth: 2,
200            num_post_smooth: 2,
201            ..Default::default()
202        }
203    }
204
205    /// Configuration for difficult/ill-conditioned problems
206    pub fn for_difficult_problems() -> Self {
207        Self {
208            coarsening: AmgCoarsening::RugeStuben,
209            interpolation: AmgInterpolation::Extended,
210            smoother: AmgSmoother::SymmetricGaussSeidel,
211            strong_threshold: 0.25,
212            max_interp_elements: 8,
213            num_pre_smooth: 2,
214            num_post_smooth: 2,
215            ..Default::default()
216        }
217    }
218}
219
220/// Point classification in coarsening
221#[derive(Debug, Clone, Copy, PartialEq, Eq)]
222enum PointType {
223    /// Undecided
224    Undecided,
225    /// Coarse point (C-point)
226    Coarse,
227    /// Fine point (F-point)
228    Fine,
229}
230
231/// Single level in the AMG hierarchy
232#[derive(Debug, Clone)]
233struct AmgLevel<T: ComplexField> {
234    /// System matrix A at this level (CSR format)
235    matrix: CsrMatrix<T>,
236
237    /// Prolongation operator P: coarse -> fine
238    prolongation: Option<CsrMatrix<T>>,
239
240    /// Restriction operator R: fine -> coarse (typically R = P^T)
241    restriction: Option<CsrMatrix<T>>,
242
243    /// Inverse diagonal for Jacobi smoothing
244    diag_inv: Array1<T>,
245
246    /// Coarse-to-fine mapping
247    coarse_to_fine: Vec<usize>,
248
249    /// Number of DOFs at this level
250    num_dofs: usize,
251}
252
253/// Algebraic Multigrid Preconditioner
254///
255/// Implements a classical AMG V-cycle preconditioner with configurable
256/// coarsening, interpolation, and smoothing strategies.
257#[derive(Debug, Clone)]
258pub struct AmgPreconditioner<T: ComplexField> {
259    /// AMG hierarchy (finest to coarsest)
260    levels: Vec<AmgLevel<T>>,
261
262    /// Configuration
263    config: AmgConfig,
264
265    /// Statistics
266    setup_time_ms: f64,
267    grid_complexity: f64,
268    operator_complexity: f64,
269}
270
271impl<T: ComplexField> AmgPreconditioner<T>
272where
273    T::Real: Sync + Send,
274{
275    /// Create AMG preconditioner from a CSR matrix
276    pub fn from_csr(matrix: &CsrMatrix<T>, config: AmgConfig) -> Self {
277        let start = std::time::Instant::now();
278
279        let mut levels = Vec::new();
280        let mut current_matrix = matrix.clone();
281
282        // Extract diagonal for first level
283        let diag_inv = Self::compute_diag_inv(&current_matrix);
284
285        levels.push(AmgLevel {
286            matrix: current_matrix.clone(),
287            prolongation: None,
288            restriction: None,
289            diag_inv,
290            coarse_to_fine: Vec::new(),
291            num_dofs: current_matrix.num_rows,
292        });
293
294        // Build hierarchy
295        for _level_idx in 0..config.max_levels - 1 {
296            let n = current_matrix.num_rows;
297            if n <= config.coarse_size {
298                break;
299            }
300
301            // Compute strength matrix
302            let strong_connections =
303                Self::compute_strength_matrix(&current_matrix, config.strong_threshold);
304
305            // Coarsening
306            let (point_types, coarse_to_fine) = match config.coarsening {
307                AmgCoarsening::RugeStuben => {
308                    Self::coarsen_ruge_stuben(&current_matrix, &strong_connections)
309                }
310                AmgCoarsening::Pmis | AmgCoarsening::Hmis => {
311                    Self::coarsen_pmis(&current_matrix, &strong_connections)
312                }
313            };
314
315            let num_coarse = coarse_to_fine.len();
316            if num_coarse == 0 || num_coarse >= n {
317                // Can't coarsen further
318                break;
319            }
320
321            // Build interpolation operator
322            let prolongation = Self::build_interpolation(
323                &current_matrix,
324                &strong_connections,
325                &point_types,
326                &coarse_to_fine,
327                &config,
328            );
329
330            // Restriction is transpose of prolongation
331            let restriction = Self::transpose_csr(&prolongation);
332
333            // Galerkin coarse grid: A_c = R * A * P
334            let coarse_matrix =
335                Self::galerkin_product(&restriction, &current_matrix, &prolongation);
336
337            // Extract diagonal for new level
338            let coarse_diag_inv = Self::compute_diag_inv(&coarse_matrix);
339
340            // Update level with P and R
341            if let Some(last) = levels.last_mut() {
342                last.prolongation = Some(prolongation);
343                last.restriction = Some(restriction);
344                last.coarse_to_fine = coarse_to_fine;
345            }
346
347            // Add coarse level
348            levels.push(AmgLevel {
349                matrix: coarse_matrix.clone(),
350                prolongation: None,
351                restriction: None,
352                diag_inv: coarse_diag_inv,
353                coarse_to_fine: Vec::new(),
354                num_dofs: num_coarse,
355            });
356
357            current_matrix = coarse_matrix;
358        }
359
360        let setup_time_ms = start.elapsed().as_secs_f64() * 1000.0;
361
362        // Compute complexities
363        let (grid_complexity, operator_complexity) = Self::compute_complexities(&levels);
364
365        Self {
366            levels,
367            config,
368            setup_time_ms,
369            grid_complexity,
370            operator_complexity,
371        }
372    }
373
374    /// Get number of levels in hierarchy
375    pub fn num_levels(&self) -> usize {
376        self.levels.len()
377    }
378
379    /// Get setup time in milliseconds
380    pub fn setup_time_ms(&self) -> f64 {
381        self.setup_time_ms
382    }
383
384    /// Get grid complexity (sum of DOFs / fine DOFs)
385    pub fn grid_complexity(&self) -> f64 {
386        self.grid_complexity
387    }
388
389    /// Get operator complexity (sum of nnz / fine nnz)
390    pub fn operator_complexity(&self) -> f64 {
391        self.operator_complexity
392    }
393
394    /// Get configuration
395    pub fn config(&self) -> &AmgConfig {
396        &self.config
397    }
398
399    /// Compute inverse diagonal for Jacobi smoothing
400    fn compute_diag_inv(matrix: &CsrMatrix<T>) -> Array1<T> {
401        let n = matrix.num_rows;
402        let mut diag_inv = Array1::from_elem(n, T::one());
403
404        for i in 0..n {
405            let diag = matrix.get(i, i);
406            let tol = T::Real::from_f64(1e-15).unwrap();
407            if diag.norm() > tol {
408                diag_inv[i] = diag.inv();
409            }
410        }
411
412        diag_inv
413    }
414
415    /// Compute strength of connection matrix
416    ///
417    /// Entry (i,j) is strong if |a_ij| >= θ * max_k!=i |a_ik|
418    fn compute_strength_matrix(matrix: &CsrMatrix<T>, theta: f64) -> Vec<Vec<usize>> {
419        let n = matrix.num_rows;
420
421        #[cfg(any(feature = "native", feature = "wasm"))]
422        {
423            parallel_map_indexed(n, |i| {
424                // Find max off-diagonal magnitude in row i
425                let mut max_off_diag = T::Real::from_f64(0.0).unwrap();
426                for (j, val) in matrix.row_entries(i) {
427                    if i != j {
428                        let norm = val.norm();
429                        if norm > max_off_diag {
430                            max_off_diag = norm;
431                        }
432                    }
433                }
434
435                let threshold = T::Real::from_f64(theta).unwrap() * max_off_diag;
436
437                // Collect strong connections
438                let mut row_strong = Vec::new();
439                for (j, val) in matrix.row_entries(i) {
440                    if i != j && val.norm() >= threshold {
441                        row_strong.push(j);
442                    }
443                }
444                row_strong
445            })
446        }
447
448        #[cfg(not(any(feature = "native", feature = "wasm")))]
449        {
450            let mut strong: Vec<Vec<usize>> = vec![Vec::new(); n];
451            for (i, row_strong) in strong.iter_mut().enumerate().take(n) {
452                // Find max off-diagonal magnitude in row i
453                let mut max_off_diag = T::Real::from_f64(0.0).unwrap();
454                for (j, val) in matrix.row_entries(i) {
455                    if i != j {
456                        let norm = val.norm();
457                        if norm > max_off_diag {
458                            max_off_diag = norm;
459                        }
460                    }
461                }
462
463                let threshold = T::Real::from_f64(theta).unwrap() * max_off_diag;
464
465                // Collect strong connections
466                for (j, val) in matrix.row_entries(i) {
467                    if i != j && val.norm() >= threshold {
468                        row_strong.push(j);
469                    }
470                }
471            }
472            strong
473        }
474    }
475
476    /// Classical Ruge-Stüben coarsening
477    fn coarsen_ruge_stuben(
478        matrix: &CsrMatrix<T>,
479        strong: &[Vec<usize>],
480    ) -> (Vec<PointType>, Vec<usize>) {
481        let n = matrix.num_rows;
482        let mut point_types = vec![PointType::Undecided; n];
483
484        // Compute influence measure λ_i = |S_i^T| (how many points strongly depend on i)
485        let mut lambda: Vec<usize> = vec![0; n];
486        for row in strong.iter().take(n) {
487            for &j in row {
488                lambda[j] += 1;
489            }
490        }
491
492        // Build priority queue (we use a simple approach: process by decreasing lambda)
493        let mut order: Vec<usize> = (0..n).collect();
494        order.sort_by(|&a, &b| lambda[b].cmp(&lambda[a]));
495
496        // First pass: select C-points
497        for &i in &order {
498            if point_types[i] != PointType::Undecided {
499                continue;
500            }
501
502            // Make i a C-point
503            point_types[i] = PointType::Coarse;
504
505            // All points that strongly depend on i become F-points
506            for j in 0..n {
507                if point_types[j] == PointType::Undecided && strong[j].contains(&i) {
508                    point_types[j] = PointType::Fine;
509                    // Update lambda for neighbors
510                    for &k in &strong[j] {
511                        if point_types[k] == PointType::Undecided {
512                            lambda[k] = lambda[k].saturating_sub(1);
513                        }
514                    }
515                }
516            }
517        }
518
519        // Ensure all remaining undecided become fine
520        for pt in &mut point_types {
521            if *pt == PointType::Undecided {
522                *pt = PointType::Fine;
523            }
524        }
525
526        // Build coarse-to-fine mapping
527        let coarse_to_fine: Vec<usize> = (0..n)
528            .filter(|&i| point_types[i] == PointType::Coarse)
529            .collect();
530
531        (point_types, coarse_to_fine)
532    }
533
534    /// Parallel Modified Independent Set (PMIS) coarsening
535    fn coarsen_pmis(matrix: &CsrMatrix<T>, strong: &[Vec<usize>]) -> (Vec<PointType>, Vec<usize>) {
536        let n = matrix.num_rows;
537        let mut point_types = vec![PointType::Undecided; n];
538
539        // Compute weights based on number of strong connections
540        let weights: Vec<f64> = (0..n)
541            .map(|i| {
542                // Weight = |S_i| + random tie-breaker
543                strong[i].len() as f64 + (i as f64 * 0.0001) % 0.001
544            })
545            .collect();
546
547        // Iterative independent set selection
548        let mut changed = true;
549        let mut iteration = 0;
550        const MAX_ITERATIONS: usize = 100;
551
552        while changed && iteration < MAX_ITERATIONS {
553            changed = false;
554            iteration += 1;
555
556            #[cfg(any(feature = "native", feature = "wasm"))]
557            {
558                // Parallel pass: determine new C-points and F-points
559                let updates: Vec<(usize, PointType)> =
560                    parallel_enumerate_map(&point_types, |i, pt| {
561                        if *pt != PointType::Undecided {
562                            return (i, *pt);
563                        }
564
565                        // Check if i has maximum weight among undecided strong neighbors
566                        let mut is_max = true;
567                        for &j in &strong[i] {
568                            if point_types[j] == PointType::Undecided && weights[j] > weights[i] {
569                                is_max = false;
570                                break;
571                            }
572                        }
573
574                        // Check if any strong neighbor is already C
575                        let has_c_neighbor = strong[i]
576                            .iter()
577                            .any(|&j| point_types[j] == PointType::Coarse);
578
579                        if has_c_neighbor {
580                            (i, PointType::Fine)
581                        } else if is_max {
582                            (i, PointType::Coarse)
583                        } else {
584                            (i, PointType::Undecided)
585                        }
586                    });
587
588                for (i, new_type) in updates {
589                    if point_types[i] != new_type {
590                        point_types[i] = new_type;
591                        changed = true;
592                    }
593                }
594            }
595
596            #[cfg(not(any(feature = "native", feature = "wasm")))]
597            {
598                // Sequential fallback
599                let old_types = point_types.clone();
600                for i in 0..n {
601                    if old_types[i] != PointType::Undecided {
602                        continue;
603                    }
604
605                    // Check if i has maximum weight among undecided strong neighbors
606                    let mut is_max = true;
607                    for &j in &strong[i] {
608                        if old_types[j] == PointType::Undecided && weights[j] > weights[i] {
609                            is_max = false;
610                            break;
611                        }
612                    }
613
614                    // Check if any strong neighbor is already C
615                    let has_c_neighbor =
616                        strong[i].iter().any(|&j| old_types[j] == PointType::Coarse);
617
618                    if has_c_neighbor {
619                        point_types[i] = PointType::Fine;
620                        changed = true;
621                    } else if is_max {
622                        point_types[i] = PointType::Coarse;
623                        changed = true;
624                    }
625                }
626            }
627        }
628
629        // Any remaining undecided become coarse
630        for pt in &mut point_types {
631            if *pt == PointType::Undecided {
632                *pt = PointType::Coarse;
633            }
634        }
635
636        // Build coarse-to-fine mapping
637        let coarse_to_fine: Vec<usize> = (0..n)
638            .filter(|&i| point_types[i] == PointType::Coarse)
639            .collect();
640
641        (point_types, coarse_to_fine)
642    }
643
644    /// Build interpolation operator P
645    fn build_interpolation(
646        matrix: &CsrMatrix<T>,
647        strong: &[Vec<usize>],
648        point_types: &[PointType],
649        coarse_to_fine: &[usize],
650        config: &AmgConfig,
651    ) -> CsrMatrix<T> {
652        let n_fine = matrix.num_rows;
653        let n_coarse = coarse_to_fine.len();
654
655        // Build fine-to-coarse mapping
656        let mut fine_to_coarse = vec![usize::MAX; n_fine];
657        for (coarse_idx, &fine_idx) in coarse_to_fine.iter().enumerate() {
658            fine_to_coarse[fine_idx] = coarse_idx;
659        }
660
661        // Build P row by row
662        let mut triplets: Vec<(usize, usize, T)> = Vec::new();
663
664        for i in 0..n_fine {
665            match point_types[i] {
666                PointType::Coarse => {
667                    // C-point: identity mapping P_ij = 1 if j = coarse_index(i)
668                    let coarse_idx = fine_to_coarse[i];
669                    triplets.push((i, coarse_idx, T::one()));
670                }
671                PointType::Fine => {
672                    // F-point: interpolate from strong C-neighbors
673                    let a_ii = matrix.get(i, i);
674
675                    // Collect strong C-neighbors
676                    let c_neighbors: Vec<usize> = strong[i]
677                        .iter()
678                        .copied()
679                        .filter(|&j| point_types[j] == PointType::Coarse)
680                        .collect();
681
682                    if c_neighbors.is_empty() {
683                        continue;
684                    }
685
686                    // Standard interpolation weights
687                    match config.interpolation {
688                        AmgInterpolation::Direct | AmgInterpolation::Standard => {
689                            let mut weights: Vec<(usize, T)> = Vec::new();
690                            let mut sum_weights = T::zero();
691
692                            for &j in &c_neighbors {
693                                let a_ij = matrix.get(i, j);
694                                let tol = T::Real::from_f64(1e-15).unwrap();
695                                if a_ii.norm() > tol {
696                                    let w = T::zero() - a_ij * a_ii.inv();
697                                    weights.push((fine_to_coarse[j], w));
698                                    sum_weights += w;
699                                }
700                            }
701
702                            // Add weak connections contribution (standard interpolation)
703                            if config.interpolation == AmgInterpolation::Standard {
704                                let mut weak_sum = T::zero();
705                                for (j, val) in matrix.row_entries(i) {
706                                    if j != i && !c_neighbors.contains(&j) {
707                                        weak_sum += val;
708                                    }
709                                }
710
711                                let tol = T::Real::from_f64(1e-15).unwrap();
712                                if sum_weights.norm() > tol && weak_sum.norm() > tol {
713                                    let scale = T::one() + weak_sum * (a_ii * sum_weights).inv();
714                                    for (_, w) in &mut weights {
715                                        *w *= scale;
716                                    }
717                                }
718                            }
719
720                            // Truncate small weights if configured
721                            if config.trunc_factor > 0.0 {
722                                let max_w = weights.iter().map(|(_, w)| w.norm()).fold(
723                                    T::Real::from_f64(0.0).unwrap(),
724                                    |a, b| {
725                                        if a > b { a } else { b }
726                                    },
727                                );
728                                let threshold =
729                                    T::Real::from_f64(config.trunc_factor).unwrap() * max_w;
730                                weights.retain(|(_, w)| w.norm() >= threshold);
731
732                                if weights.len() > config.max_interp_elements {
733                                    weights.sort_by(|a, b| {
734                                        b.1.norm().partial_cmp(&a.1.norm()).unwrap()
735                                    });
736                                    weights.truncate(config.max_interp_elements);
737                                }
738                            }
739
740                            for (coarse_idx, w) in weights {
741                                triplets.push((i, coarse_idx, w));
742                            }
743                        }
744                        AmgInterpolation::Extended => {
745                            let mut weights: Vec<(usize, T)> = Vec::new();
746
747                            // Direct C-neighbors
748                            for &j in &c_neighbors {
749                                let a_ij = matrix.get(i, j);
750                                let tol = T::Real::from_f64(1e-15).unwrap();
751                                if a_ii.norm() > tol {
752                                    let w = T::zero() - a_ij * a_ii.inv();
753                                    weights.push((fine_to_coarse[j], w));
754                                }
755                            }
756
757                            // F-neighbors contribute through their C-neighbors
758                            let f_neighbors: Vec<usize> = strong[i]
759                                .iter()
760                                .copied()
761                                .filter(|&j| point_types[j] == PointType::Fine)
762                                .collect();
763
764                            for &k in &f_neighbors {
765                                let a_ik = matrix.get(i, k);
766                                let a_kk = matrix.get(k, k);
767
768                                let tol = T::Real::from_f64(1e-15).unwrap();
769                                if a_kk.norm() < tol {
770                                    continue;
771                                }
772
773                                for &j in &strong[k] {
774                                    if point_types[j] == PointType::Coarse {
775                                        let a_kj = matrix.get(k, j);
776                                        let w = T::zero() - a_ik * a_kj * (a_ii * a_kk).inv();
777
778                                        let coarse_j = fine_to_coarse[j];
779                                        if let Some((_, existing)) =
780                                            weights.iter_mut().find(|(idx, _)| *idx == coarse_j)
781                                        {
782                                            *existing += w;
783                                        } else {
784                                            weights.push((coarse_j, w));
785                                        }
786                                    }
787                                }
788                            }
789
790                            if weights.len() > config.max_interp_elements {
791                                weights
792                                    .sort_by(|a, b| b.1.norm().partial_cmp(&a.1.norm()).unwrap());
793                                weights.truncate(config.max_interp_elements);
794                            }
795
796                            for (coarse_idx, w) in weights {
797                                triplets.push((i, coarse_idx, w));
798                            }
799                        }
800                    }
801                }
802                PointType::Undecided => {}
803            }
804        }
805
806        CsrMatrix::from_triplets(n_fine, n_coarse, triplets)
807    }
808
809    /// Transpose a CSR matrix
810    fn transpose_csr(matrix: &CsrMatrix<T>) -> CsrMatrix<T> {
811        let m = matrix.num_rows;
812        let n = matrix.num_cols;
813
814        let mut triplets: Vec<(usize, usize, T)> = Vec::new();
815        for i in 0..m {
816            for (j, val) in matrix.row_entries(i) {
817                triplets.push((j, i, val));
818            }
819        }
820
821        CsrMatrix::from_triplets(n, m, triplets)
822    }
823
824    /// Compute Galerkin coarse grid operator: A_c = R * A * P
825    fn galerkin_product(r: &CsrMatrix<T>, a: &CsrMatrix<T>, p: &CsrMatrix<T>) -> CsrMatrix<T> {
826        let ap = a.matmul(p);
827        r.matmul(&ap)
828    }
829
830    /// Sparse matrix multiplication - now uses optimized CSR matmul
831    #[allow(dead_code)]
832    fn sparse_matmul(a: &CsrMatrix<T>, b: &CsrMatrix<T>) -> CsrMatrix<T> {
833        a.matmul(b)
834    }
835
836    /// Compute grid and operator complexities
837    fn compute_complexities(levels: &[AmgLevel<T>]) -> (f64, f64) {
838        if levels.is_empty() {
839            return (1.0, 1.0);
840        }
841
842        let fine_dofs = levels[0].num_dofs as f64;
843        let fine_nnz = levels[0].matrix.nnz() as f64;
844
845        let total_dofs: f64 = levels.iter().map(|l| l.num_dofs as f64).sum();
846        let total_nnz: f64 = levels.iter().map(|l| l.matrix.nnz() as f64).sum();
847
848        let grid_complexity = total_dofs / fine_dofs;
849        let operator_complexity = total_nnz / fine_nnz;
850
851        (grid_complexity, operator_complexity)
852    }
853
854    /// Apply Jacobi smoothing: x = x + ω * D^{-1} * (b - A*x)
855    fn smooth_jacobi(
856        matrix: &CsrMatrix<T>,
857        diag_inv: &Array1<T>,
858        x: &mut Array1<T>,
859        b: &Array1<T>,
860        omega: f64,
861        num_sweeps: usize,
862    ) {
863        let omega = T::from_real(T::Real::from_f64(omega).unwrap());
864        let n = x.len();
865
866        for _ in 0..num_sweeps {
867            let r = b - &matrix.matvec(x);
868
869            #[cfg(any(feature = "native", feature = "wasm"))]
870            {
871                let updates: Vec<T> = parallel_map_indexed(n, |i| omega * diag_inv[i] * r[i]);
872                for (i, delta) in updates.into_iter().enumerate() {
873                    x[i] += delta;
874                }
875            }
876
877            #[cfg(not(any(feature = "native", feature = "wasm")))]
878            {
879                for i in 0..n {
880                    x[i] += omega * diag_inv[i] * r[i];
881                }
882            }
883        }
884    }
885
886    /// Apply l1-Jacobi smoothing
887    fn smooth_l1_jacobi(
888        matrix: &CsrMatrix<T>,
889        x: &mut Array1<T>,
890        b: &Array1<T>,
891        num_sweeps: usize,
892    ) {
893        let n = x.len();
894
895        let l1_diag: Vec<T::Real> = (0..n)
896            .map(|i| {
897                let mut sum = T::Real::from_f64(0.0).unwrap();
898                for (_, val) in matrix.row_entries(i) {
899                    sum += val.norm();
900                }
901                let tol = T::Real::from_f64(1e-15).unwrap();
902                if sum > tol {
903                    sum
904                } else {
905                    T::Real::from_f64(1.0).unwrap()
906                }
907            })
908            .collect();
909
910        for _ in 0..num_sweeps {
911            let r = b - &matrix.matvec(x);
912
913            #[cfg(any(feature = "native", feature = "wasm"))]
914            {
915                let updates: Vec<T> =
916                    parallel_map_indexed(n, |i| r[i] * T::from_real(l1_diag[i]).inv());
917                for (i, delta) in updates.into_iter().enumerate() {
918                    x[i] += delta;
919                }
920            }
921
922            #[cfg(not(any(feature = "native", feature = "wasm")))]
923            {
924                for i in 0..n {
925                    x[i] += r[i] * T::from_real(l1_diag[i]).inv();
926                }
927            }
928        }
929    }
930
931    /// Apply symmetric Gauss-Seidel smoothing
932    fn smooth_sym_gauss_seidel(
933        matrix: &CsrMatrix<T>,
934        x: &mut Array1<T>,
935        b: &Array1<T>,
936        num_sweeps: usize,
937    ) {
938        let n = x.len();
939        let tol = T::Real::from_f64(1e-15).unwrap();
940
941        for _ in 0..num_sweeps {
942            // Forward sweep
943            for i in 0..n {
944                let mut sum = b[i];
945                let mut diag = T::one();
946
947                for (j, val) in matrix.row_entries(i) {
948                    if j == i {
949                        diag = val;
950                    } else {
951                        sum -= val * x[j];
952                    }
953                }
954
955                if diag.norm() > tol {
956                    x[i] = sum * diag.inv();
957                }
958            }
959
960            // Backward sweep
961            for i in (0..n).rev() {
962                let mut sum = b[i];
963                let mut diag = T::one();
964
965                for (j, val) in matrix.row_entries(i) {
966                    if j == i {
967                        diag = val;
968                    } else {
969                        sum -= val * x[j];
970                    }
971                }
972
973                if diag.norm() > tol {
974                    x[i] = sum * diag.inv();
975                }
976            }
977        }
978    }
979
980    /// Apply V-cycle
981    fn v_cycle(&self, level: usize, x: &mut Array1<T>, b: &Array1<T>) {
982        let lvl = &self.levels[level];
983
984        // Coarsest level: direct solve (or many smoothing iterations)
985        if level == self.levels.len() - 1 || lvl.prolongation.is_none() {
986            match self.config.smoother {
987                AmgSmoother::Jacobi | AmgSmoother::Chebyshev => {
988                    Self::smooth_jacobi(
989                        &lvl.matrix,
990                        &lvl.diag_inv,
991                        x,
992                        b,
993                        self.config.jacobi_weight,
994                        20,
995                    );
996                }
997                AmgSmoother::L1Jacobi => {
998                    Self::smooth_l1_jacobi(&lvl.matrix, x, b, 20);
999                }
1000                AmgSmoother::SymmetricGaussSeidel => {
1001                    Self::smooth_sym_gauss_seidel(&lvl.matrix, x, b, 10);
1002                }
1003            }
1004            return;
1005        }
1006
1007        // Pre-smoothing
1008        match self.config.smoother {
1009            AmgSmoother::Jacobi | AmgSmoother::Chebyshev => {
1010                Self::smooth_jacobi(
1011                    &lvl.matrix,
1012                    &lvl.diag_inv,
1013                    x,
1014                    b,
1015                    self.config.jacobi_weight,
1016                    self.config.num_pre_smooth,
1017                );
1018            }
1019            AmgSmoother::L1Jacobi => {
1020                Self::smooth_l1_jacobi(&lvl.matrix, x, b, self.config.num_pre_smooth);
1021            }
1022            AmgSmoother::SymmetricGaussSeidel => {
1023                Self::smooth_sym_gauss_seidel(&lvl.matrix, x, b, self.config.num_pre_smooth);
1024            }
1025        }
1026
1027        // Compute residual: r = b - A*x
1028        let r = b - &lvl.matrix.matvec(x);
1029
1030        // Restrict residual to coarse grid: r_c = R * r
1031        let r_coarse = lvl.restriction.as_ref().unwrap().matvec(&r);
1032
1033        // Initialize coarse correction
1034        let n_coarse = self.levels[level + 1].num_dofs;
1035        let mut e_coarse = Array1::from_elem(n_coarse, T::zero());
1036
1037        // Recursive call
1038        self.v_cycle(level + 1, &mut e_coarse, &r_coarse);
1039
1040        // Prolongate correction: e = P * e_c
1041        let e = lvl.prolongation.as_ref().unwrap().matvec(&e_coarse);
1042
1043        // Apply correction: x = x + e
1044        *x = x.clone() + e;
1045
1046        // Post-smoothing
1047        match self.config.smoother {
1048            AmgSmoother::Jacobi | AmgSmoother::Chebyshev => {
1049                Self::smooth_jacobi(
1050                    &lvl.matrix,
1051                    &lvl.diag_inv,
1052                    x,
1053                    b,
1054                    self.config.jacobi_weight,
1055                    self.config.num_post_smooth,
1056                );
1057            }
1058            AmgSmoother::L1Jacobi => {
1059                Self::smooth_l1_jacobi(&lvl.matrix, x, b, self.config.num_post_smooth);
1060            }
1061            AmgSmoother::SymmetricGaussSeidel => {
1062                Self::smooth_sym_gauss_seidel(&lvl.matrix, x, b, self.config.num_post_smooth);
1063            }
1064        }
1065    }
1066}
1067
1068impl<T: ComplexField> Preconditioner<T> for AmgPreconditioner<T>
1069where
1070    T::Real: Sync + Send,
1071{
1072    fn apply(&self, r: &Array1<T>) -> Array1<T> {
1073        if self.levels.is_empty() {
1074            return r.clone();
1075        }
1076
1077        let n = self.levels[0].num_dofs;
1078        if r.len() != n {
1079            return r.clone();
1080        }
1081
1082        let mut z = Array1::from_elem(n, T::zero());
1083
1084        match self.config.cycle {
1085            AmgCycle::VCycle => {
1086                self.v_cycle(0, &mut z, r);
1087            }
1088            AmgCycle::WCycle => {
1089                self.v_cycle(0, &mut z, r);
1090                self.v_cycle(0, &mut z, r);
1091            }
1092            AmgCycle::FCycle => {
1093                self.v_cycle(0, &mut z, r);
1094                let residual = r - &self.levels[0].matrix.matvec(&z);
1095                let mut correction = Array1::from_elem(n, T::zero());
1096                self.v_cycle(0, &mut correction, &residual);
1097                z = z + correction;
1098            }
1099        }
1100
1101        z
1102    }
1103}
1104
1105/// Diagnostic information about AMG setup
1106#[derive(Debug, Clone)]
1107pub struct AmgDiagnostics {
1108    /// Number of levels
1109    pub num_levels: usize,
1110    /// Grid complexity
1111    pub grid_complexity: f64,
1112    /// Operator complexity
1113    pub operator_complexity: f64,
1114    /// Setup time in milliseconds
1115    pub setup_time_ms: f64,
1116    /// DOFs per level
1117    pub level_dofs: Vec<usize>,
1118    /// NNZ per level
1119    pub level_nnz: Vec<usize>,
1120}
1121
1122impl<T: ComplexField> AmgPreconditioner<T> {
1123    /// Get diagnostic information
1124    pub fn diagnostics(&self) -> AmgDiagnostics {
1125        AmgDiagnostics {
1126            num_levels: self.levels.len(),
1127            grid_complexity: self.grid_complexity,
1128            operator_complexity: self.operator_complexity,
1129            setup_time_ms: self.setup_time_ms,
1130            level_dofs: self.levels.iter().map(|l| l.num_dofs).collect(),
1131            level_nnz: self.levels.iter().map(|l| l.matrix.nnz()).collect(),
1132        }
1133    }
1134}
1135
1136#[cfg(test)]
1137mod tests {
1138    use super::*;
1139    use num_complex::Complex64;
1140
1141    /// Create a simple 1D Laplacian matrix for testing
1142    fn create_1d_laplacian(n: usize) -> CsrMatrix<Complex64> {
1143        let mut triplets: Vec<(usize, usize, Complex64)> = Vec::new();
1144
1145        for i in 0..n {
1146            triplets.push((i, i, Complex64::new(2.0, 0.0)));
1147            if i > 0 {
1148                triplets.push((i, i - 1, Complex64::new(-1.0, 0.0)));
1149            }
1150            if i < n - 1 {
1151                triplets.push((i, i + 1, Complex64::new(-1.0, 0.0)));
1152            }
1153        }
1154
1155        CsrMatrix::from_triplets(n, n, triplets)
1156    }
1157
1158    #[test]
1159    fn test_amg_creation() {
1160        let matrix = create_1d_laplacian(100);
1161        let config = AmgConfig::default();
1162
1163        let amg = AmgPreconditioner::from_csr(&matrix, config);
1164
1165        assert!(amg.num_levels() >= 2);
1166        assert!(amg.grid_complexity() >= 1.0);
1167        assert!(amg.operator_complexity() >= 1.0);
1168    }
1169
1170    #[test]
1171    fn test_amg_apply() {
1172        let matrix = create_1d_laplacian(50);
1173        let config = AmgConfig::default();
1174        let amg = AmgPreconditioner::from_csr(&matrix, config);
1175
1176        let r = Array1::from_vec((0..50).map(|i| Complex64::new(i as f64, 0.0)).collect());
1177
1178        let z = amg.apply(&r);
1179
1180        assert_eq!(z.len(), r.len());
1181
1182        let diff: f64 = (&z - &r).iter().map(|x| x.norm()).sum();
1183        assert!(diff > 1e-10, "Preconditioner should modify the vector");
1184    }
1185
1186    #[test]
1187    fn test_amg_pmis_coarsening() {
1188        let matrix = create_1d_laplacian(100);
1189        let config = AmgConfig {
1190            coarsening: AmgCoarsening::Pmis,
1191            ..Default::default()
1192        };
1193
1194        let amg = AmgPreconditioner::from_csr(&matrix, config);
1195        assert!(amg.num_levels() >= 2);
1196    }
1197
1198    #[test]
1199    fn test_amg_different_smoothers() {
1200        let matrix = create_1d_laplacian(50);
1201        let r = Array1::from_vec((0..50).map(|i| Complex64::new(i as f64, 0.0)).collect());
1202
1203        for smoother in [
1204            AmgSmoother::Jacobi,
1205            AmgSmoother::L1Jacobi,
1206            AmgSmoother::SymmetricGaussSeidel,
1207        ] {
1208            let config = AmgConfig {
1209                smoother,
1210                ..Default::default()
1211            };
1212            let amg = AmgPreconditioner::from_csr(&matrix, config);
1213
1214            let z = amg.apply(&r);
1215            assert_eq!(z.len(), r.len());
1216        }
1217    }
1218
1219    #[test]
1220    fn test_amg_reduces_residual() {
1221        let n = 64;
1222        let matrix = create_1d_laplacian(n);
1223        let config = AmgConfig::default();
1224        let amg = AmgPreconditioner::from_csr(&matrix, config);
1225
1226        let b = Array1::from_vec(
1227            (0..n)
1228                .map(|i| Complex64::new((i as f64).sin(), 0.0))
1229                .collect(),
1230        );
1231
1232        let mut x = Array1::from_elem(n, Complex64::new(0.0, 0.0));
1233
1234        let r0 = &b - &matrix.matvec(&x);
1235        let norm_r0: f64 = r0.iter().map(|v| v.norm_sqr()).sum::<f64>().sqrt();
1236
1237        for _ in 0..10 {
1238            let r = &b - &matrix.matvec(&x);
1239            let z = amg.apply(&r);
1240            x = x + z;
1241        }
1242
1243        let rf = &b - &matrix.matvec(&x);
1244        let norm_rf: f64 = rf.iter().map(|v| v.norm_sqr()).sum::<f64>().sqrt();
1245
1246        assert!(
1247            norm_rf < norm_r0 * 0.1,
1248            "AMG should significantly reduce residual: {} -> {}",
1249            norm_r0,
1250            norm_rf
1251        );
1252    }
1253
1254    #[test]
1255    fn test_diagnostics() {
1256        let matrix = create_1d_laplacian(100);
1257        let amg = AmgPreconditioner::from_csr(&matrix, AmgConfig::default());
1258
1259        let diag = amg.diagnostics();
1260
1261        assert!(diag.num_levels >= 2);
1262        assert_eq!(diag.level_dofs.len(), diag.num_levels);
1263        assert_eq!(diag.level_nnz.len(), diag.num_levels);
1264        assert!(diag.grid_complexity >= 1.0);
1265        assert!(diag.setup_time_ms >= 0.0);
1266    }
1267}