math_audio_bem/core/assembly/
slfmm.rs

1//! Single-Level Fast Multipole Method (SLFMM) assembly
2//!
3//! Direct port of NC_BuildSystemFMBEM (SLFMM mode) from NC_EquationSystem.cpp.
4//!
5//! The SLFMM decomposes the BEM interaction matrix as:
6//! ```text
7//! [A] = [N] + [S][D][T]
8//! ```
9//! where:
10//! - [N] is the near-field matrix (direct interactions between nearby elements)
11//! - [T] is the T-matrix (element-to-cluster multipole expansion)
12//! - [D] is the D-matrix (cluster-to-cluster translation)
13//! - [S] is the S-matrix (cluster-to-element local expansion)
14//!
15//! **Note**: This module requires either the `native` or `wasm` feature for parallel processing.
16//! Parallelism is provided via rayon (native) or wasm-bindgen-rayon (WASM).
17
18#![cfg(any(feature = "native", feature = "wasm"))]
19
20use ndarray::{Array1, Array2};
21use num_complex::Complex64;
22
23use crate::core::parallel::{parallel_enumerate_filter_map, parallel_flat_map, parallel_map};
24
25use crate::core::integration::{regular_integration, singular_integration, unit_sphere_quadrature};
26use crate::core::types::{BoundaryCondition, Cluster, Element, PhysicsParams};
27use math_audio_wave::special::spherical_hankel_first_kind;
28
29/// Result of SLFMM assembly
30#[derive(Clone)]
31pub struct SlfmmSystem {
32    /// Near-field coefficient matrix (sparse, stored as dense blocks)
33    pub near_matrix: Vec<NearFieldBlock>,
34    /// T-matrix for each cluster (element DOFs → multipole expansion)
35    pub t_matrices: Vec<Array2<Complex64>>,
36    /// T-vector for RHS contribution
37    pub t_vector: Array1<Complex64>,
38    /// D-matrix entries for far cluster pairs
39    pub d_matrices: Vec<DMatrixEntry>,
40    /// S-matrix for each cluster (multipole expansion → field DOFs)
41    pub s_matrices: Vec<Array2<Complex64>>,
42    /// RHS vector
43    pub rhs: Array1<Complex64>,
44    /// Number of DOFs
45    pub num_dofs: usize,
46    /// Number of integration points on unit sphere
47    pub num_sphere_points: usize,
48    /// Number of expansion terms
49    pub num_expansion_terms: usize,
50    /// Number of clusters
51    pub num_clusters: usize,
52    /// Cluster DOF mappings: for each cluster, the global DOF indices
53    pub cluster_dof_indices: Vec<Vec<usize>>,
54}
55
56/// Near-field block between two clusters
57#[derive(Debug, Clone)]
58pub struct NearFieldBlock {
59    /// Source cluster index
60    pub source_cluster: usize,
61    /// Field cluster index
62    pub field_cluster: usize,
63    /// Dense coefficient matrix for this block
64    pub coefficients: Array2<Complex64>,
65}
66
67/// D-matrix entry for a far cluster pair
68///
69/// The D-matrix for SLFMM is diagonal (same multipole index in source and field),
70/// so we store only the diagonal entries to save memory.
71/// This reduces storage from O(n²) to O(n) per cluster pair.
72#[derive(Debug, Clone)]
73pub struct DMatrixEntry {
74    /// Source cluster index
75    pub source_cluster: usize,
76    /// Field cluster index
77    pub field_cluster: usize,
78    /// Diagonal translation coefficients (length = num_sphere_points)
79    /// The full D-matrix would be: D[p,q] = diagonal[p] if p == q, else 0
80    pub diagonal: Array1<Complex64>,
81}
82
83impl SlfmmSystem {
84    /// Create a new empty SLFMM system
85    pub fn new(
86        num_dofs: usize,
87        num_clusters: usize,
88        num_sphere_points: usize,
89        num_expansion_terms: usize,
90    ) -> Self {
91        Self {
92            near_matrix: Vec::new(),
93            t_matrices: Vec::with_capacity(num_clusters),
94            t_vector: Array1::zeros(num_sphere_points * num_clusters),
95            d_matrices: Vec::new(),
96            s_matrices: Vec::with_capacity(num_clusters),
97            rhs: Array1::zeros(num_dofs),
98            num_dofs,
99            num_sphere_points,
100            num_expansion_terms,
101            num_clusters,
102            cluster_dof_indices: Vec::with_capacity(num_clusters),
103        }
104    }
105
106    /// Extract the near-field matrix as a dense matrix for preconditioning
107    ///
108    /// This assembles only the near-field blocks into a dense matrix,
109    /// which is much smaller than the full matrix (O(N) vs O(N²) entries).
110    pub fn extract_near_field_matrix(&self) -> Array2<Complex64> {
111        let mut matrix = Array2::zeros((self.num_dofs, self.num_dofs));
112
113        for block in &self.near_matrix {
114            let src_dofs = &self.cluster_dof_indices[block.source_cluster];
115            let fld_dofs = &self.cluster_dof_indices[block.field_cluster];
116
117            // Copy block to global matrix
118            for (local_i, &global_i) in src_dofs.iter().enumerate() {
119                for (local_j, &global_j) in fld_dofs.iter().enumerate() {
120                    matrix[[global_i, global_j]] += block.coefficients[[local_i, local_j]];
121                }
122            }
123
124            // Handle symmetric storage: also fill the (fld, src) part
125            if block.source_cluster != block.field_cluster {
126                for (local_i, &global_i) in src_dofs.iter().enumerate() {
127                    for (local_j, &global_j) in fld_dofs.iter().enumerate() {
128                        matrix[[global_j, global_i]] += block.coefficients[[local_i, local_j]];
129                    }
130                }
131            }
132        }
133
134        matrix
135    }
136
137    /// Apply the SLFMM operator: y = ([N] + [S][D][T]) * x
138    ///
139    /// This is used in iterative solvers (CGS, BiCGSTAB).
140    ///
141    /// The decomposition is:
142    /// - Near-field: Direct element-to-element interactions for nearby clusters
143    /// - Far-field: Multipole expansions with S*D*T factorization
144    ///
145    /// # Arguments
146    /// * `x` - Input vector (length = num_dofs)
147    ///
148    /// # Returns
149    /// * `y` - Output vector (length = num_dofs), y = A*x
150    pub fn matvec(&self, x: &Array1<Complex64>) -> Array1<Complex64> {
151        // === Near-field contribution: y += [N] * x (parallelized) ===
152        // Compute all near-field block contributions in parallel, then sum them
153        let near_contributions: Vec<Vec<(usize, Complex64)>> =
154            parallel_flat_map(&self.near_matrix, |block| {
155                let src_dofs = &self.cluster_dof_indices[block.source_cluster];
156                let fld_dofs = &self.cluster_dof_indices[block.field_cluster];
157
158                let mut contributions = Vec::new();
159
160                // Gather x values from field cluster
161                let x_local: Array1<Complex64> = Array1::from_iter(fld_dofs.iter().map(|&i| x[i]));
162
163                // Apply block matrix
164                let y_local = block.coefficients.dot(&x_local);
165
166                // Collect contributions for source DOFs
167                for (local_i, &global_i) in src_dofs.iter().enumerate() {
168                    contributions.push((global_i, y_local[local_i]));
169                }
170
171                // Handle symmetric storage
172                if block.source_cluster != block.field_cluster {
173                    let x_src: Array1<Complex64> =
174                        Array1::from_iter(src_dofs.iter().map(|&i| x[i]));
175                    let y_fld = block.coefficients.t().dot(&x_src);
176                    for (local_j, &global_j) in fld_dofs.iter().enumerate() {
177                        contributions.push((global_j, y_fld[local_j]));
178                    }
179                }
180
181                vec![contributions]
182            });
183
184        // Accumulate near-field contributions
185        let mut y = Array1::zeros(self.num_dofs);
186        for contributions in near_contributions {
187            for (idx, val) in contributions {
188                y[idx] += val;
189            }
190        }
191
192        // === Far-field contribution: y += [S][D][T] * x (parallelized) ===
193
194        // Step 1: Compute multipole expansions for each cluster in parallel: t[c] = T[c] * x[c]
195        let multipoles: Vec<Array1<Complex64>> = crate::core::parallel::parallel_enumerate_map(
196            &self.t_matrices,
197            |cluster_idx, t_mat| {
198                let cluster_dofs = &self.cluster_dof_indices[cluster_idx];
199                if cluster_dofs.is_empty() || t_mat.is_empty() {
200                    Array1::zeros(self.num_sphere_points)
201                } else {
202                    let x_local: Array1<Complex64> =
203                        Array1::from_iter(cluster_dofs.iter().map(|&i| x[i]));
204                    t_mat.dot(&x_local)
205                }
206            },
207        );
208
209        // Step 2: Translate multipoles between far clusters (parallelized)
210        // Compute all D*multipole products in parallel
211        // D-matrix is diagonal, so D*x is just element-wise multiplication
212        let d_contributions: Vec<(usize, Array1<Complex64>)> =
213            parallel_map(&self.d_matrices, |d_entry| {
214                let src_mult = &multipoles[d_entry.source_cluster];
215                // Diagonal matrix-vector multiply: translated[i] = diagonal[i] * src_mult[i]
216                let translated = &d_entry.diagonal * src_mult;
217                (d_entry.field_cluster, translated)
218            });
219
220        // Accumulate D-matrix contributions into locals (sequential to avoid race)
221        let mut locals: Vec<Array1<Complex64>> = (0..self.num_clusters)
222            .map(|_| Array1::zeros(self.num_sphere_points))
223            .collect();
224
225        for (field_cluster, translated) in d_contributions {
226            for i in 0..self.num_sphere_points {
227                locals[field_cluster][i] += translated[i];
228            }
229        }
230
231        // Step 3: Evaluate locals at field points in parallel: y[c] += S[c] * locals[c]
232        let far_contributions: Vec<Vec<(usize, Complex64)>> =
233            parallel_enumerate_filter_map(&self.s_matrices, |cluster_idx, s_mat| {
234                let cluster_dofs = &self.cluster_dof_indices[cluster_idx];
235                if cluster_dofs.is_empty() || s_mat.is_empty() {
236                    return None;
237                }
238                let y_local = s_mat.dot(&locals[cluster_idx]);
239                let contributions: Vec<(usize, Complex64)> = cluster_dofs
240                    .iter()
241                    .enumerate()
242                    .map(|(local_j, &global_j)| (global_j, y_local[local_j]))
243                    .collect();
244                Some(contributions)
245            });
246
247        // Accumulate far-field contributions
248        for contributions in far_contributions {
249            for (idx, val) in contributions {
250                y[idx] += val;
251            }
252        }
253
254        y
255    }
256
257    /// Apply the SLFMM operator transpose: y = A^T * x (parallelized)
258    ///
259    /// Used by some iterative solvers (e.g., BiCGSTAB).
260    pub fn matvec_transpose(&self, x: &Array1<Complex64>) -> Array1<Complex64> {
261        // === Near-field contribution (transposed, parallelized) ===
262        let near_contributions: Vec<Vec<(usize, Complex64)>> =
263            parallel_flat_map(&self.near_matrix, |block| {
264                let src_dofs = &self.cluster_dof_indices[block.source_cluster];
265                let fld_dofs = &self.cluster_dof_indices[block.field_cluster];
266
267                let mut contributions = Vec::new();
268
269                // For transpose: gather from source DOFs, scatter to field DOFs
270                let x_local: Array1<Complex64> = Array1::from_iter(src_dofs.iter().map(|&i| x[i]));
271                let y_local = block.coefficients.t().dot(&x_local);
272                for (local_j, &global_j) in fld_dofs.iter().enumerate() {
273                    contributions.push((global_j, y_local[local_j]));
274                }
275
276                // Symmetric storage
277                if block.source_cluster != block.field_cluster {
278                    let x_fld: Array1<Complex64> =
279                        Array1::from_iter(fld_dofs.iter().map(|&i| x[i]));
280                    let y_src = block.coefficients.dot(&x_fld);
281                    for (local_i, &global_i) in src_dofs.iter().enumerate() {
282                        contributions.push((global_i, y_src[local_i]));
283                    }
284                }
285
286                vec![contributions]
287            });
288
289        // Accumulate near-field contributions
290        let mut y = Array1::zeros(self.num_dofs);
291        for contributions in near_contributions {
292            for (idx, val) in contributions {
293                y[idx] += val;
294            }
295        }
296
297        // === Far-field contribution (transposed, parallelized): y += T^T * D^T * S^T * x ===
298
299        // Step 1: S^T * x (parallelized)
300        let locals: Vec<Array1<Complex64>> = crate::core::parallel::parallel_enumerate_map(
301            &self.s_matrices,
302            |cluster_idx, s_mat| {
303                let cluster_dofs = &self.cluster_dof_indices[cluster_idx];
304                if cluster_dofs.is_empty() || s_mat.is_empty() {
305                    Array1::zeros(self.num_sphere_points)
306                } else {
307                    let x_local: Array1<Complex64> =
308                        Array1::from_iter(cluster_dofs.iter().map(|&i| x[i]));
309                    s_mat.t().dot(&x_local)
310                }
311            },
312        );
313
314        // Step 2: D^T translation (parallelized)
315        // D-matrix is diagonal, so D^T = D (diagonal matrices are symmetric)
316        let d_contributions: Vec<(usize, Array1<Complex64>)> =
317            parallel_map(&self.d_matrices, |d_entry| {
318                let fld_local = &locals[d_entry.field_cluster];
319                // Diagonal matrix-vector multiply: translated[i] = diagonal[i] * fld_local[i]
320                let translated = &d_entry.diagonal * fld_local;
321                (d_entry.source_cluster, translated)
322            });
323
324        // Accumulate D-matrix contributions
325        let mut multipoles: Vec<Array1<Complex64>> = (0..self.num_clusters)
326            .map(|_| Array1::zeros(self.num_sphere_points))
327            .collect();
328
329        for (source_cluster, translated) in d_contributions {
330            for i in 0..self.num_sphere_points {
331                multipoles[source_cluster][i] += translated[i];
332            }
333        }
334
335        // Step 3: T^T * multipoles (parallelized)
336        let far_contributions: Vec<Vec<(usize, Complex64)>> =
337            parallel_enumerate_filter_map(&self.t_matrices, |cluster_idx, t_mat| {
338                let cluster_dofs = &self.cluster_dof_indices[cluster_idx];
339                if cluster_dofs.is_empty() || t_mat.is_empty() {
340                    return None;
341                }
342                let y_local = t_mat.t().dot(&multipoles[cluster_idx]);
343                let contributions: Vec<(usize, Complex64)> = cluster_dofs
344                    .iter()
345                    .enumerate()
346                    .map(|(local_i, &global_i)| (global_i, y_local[local_i]))
347                    .collect();
348                Some(contributions)
349            });
350
351        // Accumulate far-field contributions
352        for contributions in far_contributions {
353            for (idx, val) in contributions {
354                y[idx] += val;
355            }
356        }
357
358        y
359    }
360}
361
362use math_audio_solvers::traits::LinearOperator;
363
364impl LinearOperator<Complex64> for SlfmmSystem {
365    fn num_rows(&self) -> usize {
366        self.num_dofs
367    }
368
369    fn num_cols(&self) -> usize {
370        self.num_dofs
371    }
372
373    fn apply(&self, x: &Array1<Complex64>) -> Array1<Complex64> {
374        self.matvec(x)
375    }
376
377    fn apply_transpose(&self, x: &Array1<Complex64>) -> Array1<Complex64> {
378        self.matvec_transpose(x)
379    }
380}
381
382/// Build the SLFMM system matrices
383///
384/// # Arguments
385/// * `elements` - Vector of mesh elements
386/// * `nodes` - Node coordinates (num_nodes × 3)
387/// * `clusters` - Vector of clusters
388/// * `physics` - Physics parameters
389/// * `n_theta` - Number of integration points in theta direction
390/// * `n_phi` - Number of integration points in phi direction
391/// * `n_terms` - Number of expansion terms
392pub fn build_slfmm_system(
393    elements: &[Element],
394    nodes: &Array2<f64>,
395    clusters: &[Cluster],
396    physics: &PhysicsParams,
397    n_theta: usize,
398    n_phi: usize,
399    n_terms: usize,
400) -> SlfmmSystem {
401    let num_dofs = count_dofs(elements);
402    let num_clusters = clusters.len();
403    let num_sphere_points = n_theta * n_phi;
404
405    let mut system = SlfmmSystem::new(num_dofs, num_clusters, num_sphere_points, n_terms);
406
407    // Build cluster DOF mappings: for each cluster, collect the global DOF indices
408    // of elements that belong to this cluster
409    build_cluster_dof_mappings(&mut system, elements, clusters);
410
411    // Compute unit sphere quadrature points
412    let (sphere_coords, sphere_weights) = unit_sphere_quadrature(n_theta, n_phi);
413
414    // Build near-field matrix
415    build_near_field(&mut system, elements, nodes, clusters, physics);
416
417    // Build T-matrices (element-to-cluster expansion)
418    build_t_matrices(
419        &mut system,
420        elements,
421        clusters,
422        physics,
423        &sphere_coords,
424        &sphere_weights,
425    );
426
427    // Build D-matrices (cluster-to-cluster translation)
428    build_d_matrices(&mut system, clusters, physics, &sphere_coords, n_terms);
429
430    // Build S-matrices (cluster-to-element evaluation)
431    build_s_matrices(
432        &mut system,
433        elements,
434        clusters,
435        physics,
436        &sphere_coords,
437        &sphere_weights,
438    );
439
440    system
441}
442
443/// Build the mapping from clusters to global DOF indices
444fn build_cluster_dof_mappings(
445    system: &mut SlfmmSystem,
446    elements: &[Element],
447    clusters: &[Cluster],
448) {
449    for cluster in clusters {
450        let mut dof_indices = Vec::new();
451        for &elem_idx in &cluster.element_indices {
452            let elem = &elements[elem_idx];
453            if elem.property.is_evaluation() {
454                continue;
455            }
456            // Collect all DOF addresses for this element
457            dof_indices.extend(elem.dof_addresses.iter().copied());
458        }
459        system.cluster_dof_indices.push(dof_indices);
460    }
461}
462
463/// Count total number of DOFs
464fn count_dofs(elements: &[Element]) -> usize {
465    elements
466        .iter()
467        .filter(|e| !e.property.is_evaluation())
468        .map(|e| e.dof_addresses.len())
469        .sum()
470}
471
472/// Build near-field matrix blocks (parallelized)
473fn build_near_field(
474    system: &mut SlfmmSystem,
475    elements: &[Element],
476    nodes: &Array2<f64>,
477    clusters: &[Cluster],
478    physics: &PhysicsParams,
479) {
480    let gamma = Complex64::new(physics.gamma(), 0.0);
481    let tau = Complex64::new(physics.tau, 0.0);
482    let beta = physics.burton_miller_beta();
483
484    // Collect all cluster pairs that need processing
485    let mut cluster_pairs: Vec<(usize, usize, bool)> = Vec::new();
486
487    for (i, cluster_i) in clusters.iter().enumerate() {
488        // Self-interaction
489        cluster_pairs.push((i, i, true));
490
491        // Interaction with near clusters (only upper triangle)
492        for &j in &cluster_i.near_clusters {
493            if j > i {
494                cluster_pairs.push((i, j, false));
495            }
496        }
497    }
498
499    // Compute all near-field blocks in parallel
500    let near_blocks: Vec<NearFieldBlock> = parallel_map(&cluster_pairs, |&(i, j, is_self)| {
501        let cluster_i = &clusters[i];
502        let cluster_j = &clusters[j];
503
504        let mut block = compute_near_block(
505            elements,
506            nodes,
507            &cluster_i.element_indices,
508            &cluster_j.element_indices,
509            physics,
510            gamma,
511            tau,
512            beta,
513            is_self,
514        );
515
516        // Add free terms to diagonal for self-interaction blocks
517        if is_self {
518            for local_idx in 0..cluster_i.element_indices.len() {
519                let elem_idx = cluster_i.element_indices[local_idx];
520                let elem = &elements[elem_idx];
521                if elem.property.is_evaluation() {
522                    continue;
523                }
524                match &elem.boundary_condition {
525                    BoundaryCondition::Velocity(_)
526                    | BoundaryCondition::VelocityWithAdmittance { .. } => {
527                        block[[local_idx, local_idx]] += gamma * 0.5;
528                    }
529                    BoundaryCondition::Pressure(_) => {
530                        block[[local_idx, local_idx]] += beta * tau * 0.5;
531                    }
532                    _ => {}
533                }
534            }
535        }
536
537        NearFieldBlock {
538            source_cluster: i,
539            field_cluster: j,
540            coefficients: block,
541        }
542    });
543
544    system.near_matrix = near_blocks;
545}
546
547/// Compute a near-field block between two sets of elements
548fn compute_near_block(
549    elements: &[Element],
550    nodes: &Array2<f64>,
551    source_indices: &[usize],
552    field_indices: &[usize],
553    physics: &PhysicsParams,
554    gamma: Complex64,
555    tau: Complex64,
556    beta: Complex64,
557    is_self: bool,
558) -> Array2<Complex64> {
559    let n_source = source_indices.len();
560    let n_field = field_indices.len();
561    let mut block = Array2::zeros((n_source, n_field));
562
563    for (i, &src_idx) in source_indices.iter().enumerate() {
564        let source_elem = &elements[src_idx];
565        if source_elem.property.is_evaluation() {
566            continue;
567        }
568
569        for (j, &fld_idx) in field_indices.iter().enumerate() {
570            let field_elem = &elements[fld_idx];
571            if field_elem.property.is_evaluation() {
572                continue;
573            }
574
575            let element_coords = get_element_coords(field_elem, nodes);
576
577            let result = if is_self && src_idx == fld_idx {
578                // Singular integration
579                singular_integration(
580                    &source_elem.center,
581                    &source_elem.normal,
582                    &element_coords,
583                    field_elem.element_type,
584                    physics,
585                    None,
586                    0,
587                    false,
588                )
589            } else {
590                // Regular integration
591                regular_integration(
592                    &source_elem.center,
593                    &source_elem.normal,
594                    &element_coords,
595                    field_elem.element_type,
596                    field_elem.area,
597                    physics,
598                    None,
599                    0,
600                    false,
601                )
602            };
603
604            // Assemble using Burton-Miller formulation
605            let coeff = result.dg_dn_integral * gamma * tau + result.d2g_dnxdny_integral * beta;
606            block[[i, j]] = coeff;
607        }
608    }
609
610    block
611}
612
613/// Build T-matrices (element to cluster multipole expansion) - parallelized
614fn build_t_matrices(
615    system: &mut SlfmmSystem,
616    elements: &[Element],
617    clusters: &[Cluster],
618    physics: &PhysicsParams,
619    sphere_coords: &[[f64; 3]],
620    sphere_weights: &[f64],
621) {
622    let k = physics.wave_number;
623    let num_sphere_points = sphere_coords.len();
624
625    // Build all T-matrices in parallel
626    let t_matrices: Vec<Array2<Complex64>> = parallel_map(clusters, |cluster| {
627        let num_elem = cluster.element_indices.len();
628        let mut t_matrix = Array2::zeros((num_sphere_points, num_elem));
629
630        for (j, &elem_idx) in cluster.element_indices.iter().enumerate() {
631            let elem = &elements[elem_idx];
632            if elem.property.is_evaluation() {
633                continue;
634            }
635
636            // Precompute difference vector
637            let diff: [f64; 3] = [
638                elem.center[0] - cluster.center[0],
639                elem.center[1] - cluster.center[1],
640                elem.center[2] - cluster.center[2],
641            ];
642
643            // For each integration direction on the unit sphere
644            for (p, coord) in sphere_coords.iter().enumerate() {
645                let s_dot_diff = coord[0] * diff[0] + coord[1] * diff[1] + coord[2] * diff[2];
646                let exp_factor = Complex64::new((k * s_dot_diff).cos(), -(k * s_dot_diff).sin());
647                t_matrix[[p, j]] = exp_factor * sphere_weights[p];
648            }
649        }
650
651        t_matrix
652    });
653
654    system.t_matrices = t_matrices;
655}
656
657/// Build D-matrices (cluster to cluster translation) - parallelized
658///
659/// The D-matrix is diagonal in the single-level FMM, so we only store
660/// the diagonal entries. This reduces memory from O(P²) to O(P) per pair
661/// where P is the number of sphere integration points.
662fn build_d_matrices(
663    system: &mut SlfmmSystem,
664    clusters: &[Cluster],
665    physics: &PhysicsParams,
666    _sphere_coords: &[[f64; 3]],
667    n_terms: usize,
668) {
669    let k = physics.wave_number;
670    let num_sphere_points = system.num_sphere_points;
671
672    // Collect all far cluster pairs
673    let mut far_pairs: Vec<(usize, usize)> = Vec::new();
674    for (i, cluster_i) in clusters.iter().enumerate() {
675        for &j in &cluster_i.far_clusters {
676            far_pairs.push((i, j));
677        }
678    }
679
680    // Log memory estimate
681    let num_pairs = far_pairs.len();
682    let mem_per_pair = num_sphere_points * std::mem::size_of::<Complex64>();
683    let total_mem_mb = (num_pairs * mem_per_pair) as f64 / (1024.0 * 1024.0);
684    if total_mem_mb > 100.0 {
685        eprintln!(
686            "    D-matrices: {} far pairs × {} points = {:.1} MB",
687            num_pairs, num_sphere_points, total_mem_mb
688        );
689    }
690
691    // Compute all D-matrices in parallel (diagonal storage only)
692    let d_matrices: Vec<DMatrixEntry> = parallel_map(&far_pairs, |&(i, j)| {
693        let cluster_i = &clusters[i];
694        let cluster_j = &clusters[j];
695
696        // Distance vector between cluster centers
697        let diff = [
698            cluster_i.center[0] - cluster_j.center[0],
699            cluster_i.center[1] - cluster_j.center[1],
700            cluster_i.center[2] - cluster_j.center[2],
701        ];
702        let r = (diff[0] * diff[0] + diff[1] * diff[1] + diff[2] * diff[2]).sqrt();
703        let kr = k * r;
704
705        // Compute translation using spherical Hankel functions
706        let h_funcs = spherical_hankel_first_kind(n_terms.max(2), kr, 1.0);
707
708        // D-matrix is diagonal: D[p,p] = h_0(kr) * ik
709        // Store only the diagonal (all entries are the same in this simplified model)
710        let d_value = h_funcs[0] * Complex64::new(0.0, k);
711        let diagonal = Array1::from_elem(num_sphere_points, d_value);
712
713        DMatrixEntry {
714            source_cluster: i,
715            field_cluster: j,
716            diagonal,
717        }
718    });
719
720    system.d_matrices = d_matrices;
721}
722
723/// Build S-matrices (cluster multipole to element evaluation) - parallelized
724fn build_s_matrices(
725    system: &mut SlfmmSystem,
726    elements: &[Element],
727    clusters: &[Cluster],
728    physics: &PhysicsParams,
729    sphere_coords: &[[f64; 3]],
730    sphere_weights: &[f64],
731) {
732    let k = physics.wave_number;
733    let num_sphere_points = sphere_coords.len();
734
735    // Build all S-matrices in parallel
736    let s_matrices: Vec<Array2<Complex64>> = parallel_map(clusters, |cluster| {
737        let num_elem = cluster.element_indices.len();
738        let mut s_matrix = Array2::zeros((num_elem, num_sphere_points));
739
740        for (j, &elem_idx) in cluster.element_indices.iter().enumerate() {
741            let elem = &elements[elem_idx];
742            if elem.property.is_evaluation() {
743                continue;
744            }
745
746            // Precompute difference vector
747            let diff: [f64; 3] = [
748                elem.center[0] - cluster.center[0],
749                elem.center[1] - cluster.center[1],
750                elem.center[2] - cluster.center[2],
751            ];
752
753            // For each integration direction on the unit sphere
754            for (p, coord) in sphere_coords.iter().enumerate() {
755                let s_dot_diff = coord[0] * diff[0] + coord[1] * diff[1] + coord[2] * diff[2];
756                let exp_factor = Complex64::new((k * s_dot_diff).cos(), (k * s_dot_diff).sin());
757                s_matrix[[j, p]] = exp_factor * sphere_weights[p];
758            }
759        }
760
761        s_matrix
762    });
763
764    system.s_matrices = s_matrices;
765}
766
767/// Get element node coordinates as Array2
768fn get_element_coords(element: &Element, nodes: &Array2<f64>) -> Array2<f64> {
769    let num_nodes = element.connectivity.len();
770    let mut coords = Array2::zeros((num_nodes, 3));
771
772    for (i, &node_idx) in element.connectivity.iter().enumerate() {
773        for j in 0..3 {
774            coords[[i, j]] = nodes[[node_idx, j]];
775        }
776    }
777
778    coords
779}
780
781#[cfg(test)]
782mod tests {
783    use super::*;
784    use crate::core::types::{BoundaryCondition, ElementProperty, ElementType};
785    use ndarray::array;
786
787    fn make_test_cluster() -> Cluster {
788        let mut cluster = Cluster::new(array![0.5, 0.5, 0.0]);
789        cluster.element_indices = vec![0, 1];
790        cluster.near_clusters = vec![];
791        cluster.far_clusters = vec![];
792        cluster
793    }
794
795    fn make_test_elements() -> (Vec<Element>, Array2<f64>) {
796        let nodes = Array2::from_shape_vec(
797            (4, 3),
798            vec![0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.5, 1.0, 0.0, 1.5, 1.0, 0.0],
799        )
800        .unwrap();
801
802        let elem0 = Element {
803            connectivity: vec![0, 1, 2],
804            element_type: ElementType::Tri3,
805            property: ElementProperty::Surface,
806            normal: array![0.0, 0.0, 1.0],
807            node_normals: Array2::zeros((3, 3)),
808            center: array![0.5, 1.0 / 3.0, 0.0],
809            area: 0.5,
810            boundary_condition: BoundaryCondition::Velocity(vec![Complex64::new(1.0, 0.0)]),
811            group: 0,
812            dof_addresses: vec![0],
813        };
814
815        let elem1 = Element {
816            connectivity: vec![1, 3, 2],
817            element_type: ElementType::Tri3,
818            property: ElementProperty::Surface,
819            normal: array![0.0, 0.0, 1.0],
820            node_normals: Array2::zeros((3, 3)),
821            center: array![1.0, 2.0 / 3.0, 0.0],
822            area: 0.5,
823            boundary_condition: BoundaryCondition::Velocity(vec![Complex64::new(0.0, 0.0)]),
824            group: 0,
825            dof_addresses: vec![1],
826        };
827
828        (vec![elem0, elem1], nodes)
829    }
830
831    #[test]
832    fn test_slfmm_system_creation() {
833        let system = SlfmmSystem::new(10, 2, 32, 5);
834        assert_eq!(system.num_dofs, 10);
835        assert_eq!(system.num_sphere_points, 32);
836        assert_eq!(system.num_expansion_terms, 5);
837    }
838
839    #[test]
840    fn test_build_slfmm_system() {
841        let (elements, nodes) = make_test_elements();
842        let cluster = make_test_cluster();
843        let clusters = vec![cluster];
844        let physics = PhysicsParams::new(100.0, 343.0, 1.21, false);
845
846        let system = build_slfmm_system(&elements, &nodes, &clusters, &physics, 4, 8, 5);
847
848        assert_eq!(system.num_dofs, 2);
849        assert_eq!(system.t_matrices.len(), 1);
850        assert_eq!(system.s_matrices.len(), 1);
851    }
852
853    #[test]
854    fn test_near_field_block() {
855        let (elements, nodes) = make_test_elements();
856        let physics = PhysicsParams::new(100.0, 343.0, 1.21, false);
857        let gamma = Complex64::new(physics.gamma(), 0.0);
858        let tau = Complex64::new(physics.tau, 0.0);
859        let beta = physics.burton_miller_beta();
860
861        let block = compute_near_block(
862            &elements,
863            &nodes,
864            &[0, 1],
865            &[0, 1],
866            &physics,
867            gamma,
868            tau,
869            beta,
870            true,
871        );
872
873        assert_eq!(block.shape(), &[2, 2]);
874        // Diagonal entries should be non-zero
875        assert!(block[[0, 0]].norm() > 0.0);
876        assert!(block[[1, 1]].norm() > 0.0);
877    }
878}