Skip to main content

math_audio_bem/core/assembly/
slfmm.rs

1//! Single-Level Fast Multipole Method (SLFMM) assembly
2//!
3//! Direct port of NC_BuildSystemFMBEM (SLFMM mode) from NC_EquationSystem.cpp.
4//!
5//! The SLFMM decomposes the BEM interaction matrix as:
6//! ```text
7//! [A] = [N] + [S][D][T]
8//! ```
9//! where:
10//! - \[N\] is the near-field matrix (direct interactions between nearby elements)
11//! - \[T\] is the T-matrix (element-to-cluster multipole expansion)
12//! - \[D\] is the D-matrix (cluster-to-cluster translation)
13//! - \[S\] is the S-matrix (cluster-to-element local expansion)
14//!
15//! **Note**: This module requires either the `native` or `wasm` feature for parallel processing.
16//! Parallelism is provided via rayon (native) or wasm-bindgen-rayon (WASM).
17
18#![cfg(any(feature = "native", feature = "wasm"))]
19
20use ndarray::{Array1, Array2};
21use num_complex::Complex64;
22
23use crate::core::parallel::{parallel_enumerate_filter_map, parallel_flat_map, parallel_map};
24
25use crate::core::integration::{regular_integration, singular_integration, unit_sphere_quadrature};
26use crate::core::types::{BoundaryCondition, Cluster, Element, PhysicsParams};
27use math_audio_wave::special::spherical_hankel_first_kind;
28
29/// Result of SLFMM assembly
30#[derive(Clone)]
31pub struct SlfmmSystem {
32    /// Near-field coefficient matrix (sparse, stored as dense blocks)
33    pub near_matrix: Vec<NearFieldBlock>,
34    /// T-matrix for each cluster (element DOFs → multipole expansion)
35    pub t_matrices: Vec<Array2<Complex64>>,
36    /// T-vector for RHS contribution
37    pub t_vector: Array1<Complex64>,
38    /// D-matrix entries for far cluster pairs
39    pub d_matrices: Vec<DMatrixEntry>,
40    /// S-matrix for each cluster (multipole expansion → field DOFs)
41    pub s_matrices: Vec<Array2<Complex64>>,
42    /// RHS vector
43    pub rhs: Array1<Complex64>,
44    /// Number of DOFs
45    pub num_dofs: usize,
46    /// Number of integration points on unit sphere
47    pub num_sphere_points: usize,
48    /// Number of expansion terms
49    pub num_expansion_terms: usize,
50    /// Number of clusters
51    pub num_clusters: usize,
52    /// Cluster DOF mappings: for each cluster, the global DOF indices
53    pub cluster_dof_indices: Vec<Vec<usize>>,
54}
55
56/// Near-field block between two clusters
57#[derive(Debug, Clone)]
58pub struct NearFieldBlock {
59    /// Source cluster index
60    pub source_cluster: usize,
61    /// Field cluster index
62    pub field_cluster: usize,
63    /// Dense coefficient matrix for this block
64    pub coefficients: Array2<Complex64>,
65}
66
67/// D-matrix entry for a far cluster pair
68///
69/// The D-matrix for SLFMM is diagonal (same multipole index in source and field),
70/// so we store only the diagonal entries to save memory.
71/// This reduces storage from O(n²) to O(n) per cluster pair.
72#[derive(Debug, Clone)]
73pub struct DMatrixEntry {
74    /// Source cluster index
75    pub source_cluster: usize,
76    /// Field cluster index
77    pub field_cluster: usize,
78    /// Diagonal translation coefficients (length = num_sphere_points)
79    /// The full D-matrix would be: `D[p,q] = diagonal[p]` if p == q, else 0
80    pub diagonal: Array1<Complex64>,
81}
82
83impl SlfmmSystem {
84    /// Create a new empty SLFMM system
85    pub fn new(
86        num_dofs: usize,
87        num_clusters: usize,
88        num_sphere_points: usize,
89        num_expansion_terms: usize,
90    ) -> Self {
91        Self {
92            near_matrix: Vec::new(),
93            t_matrices: Vec::with_capacity(num_clusters),
94            t_vector: Array1::zeros(num_sphere_points * num_clusters),
95            d_matrices: Vec::new(),
96            s_matrices: Vec::with_capacity(num_clusters),
97            rhs: Array1::zeros(num_dofs),
98            num_dofs,
99            num_sphere_points,
100            num_expansion_terms,
101            num_clusters,
102            cluster_dof_indices: Vec::with_capacity(num_clusters),
103        }
104    }
105
106    /// Extract the near-field matrix as a dense matrix for preconditioning
107    ///
108    /// This assembles only the near-field blocks into a dense matrix,
109    /// which is much smaller than the full matrix (O(N) vs O(N²) entries).
110    pub fn extract_near_field_matrix(&self) -> Array2<Complex64> {
111        let mut matrix = Array2::zeros((self.num_dofs, self.num_dofs));
112
113        for block in &self.near_matrix {
114            let src_dofs = &self.cluster_dof_indices[block.source_cluster];
115            let fld_dofs = &self.cluster_dof_indices[block.field_cluster];
116
117            // Copy block to global matrix
118            for (local_i, &global_i) in src_dofs.iter().enumerate() {
119                for (local_j, &global_j) in fld_dofs.iter().enumerate() {
120                    matrix[[global_i, global_j]] += block.coefficients[[local_i, local_j]];
121                }
122            }
123
124            // Handle symmetric storage: also fill the (fld, src) part
125            if block.source_cluster != block.field_cluster {
126                for (local_i, &global_i) in src_dofs.iter().enumerate() {
127                    for (local_j, &global_j) in fld_dofs.iter().enumerate() {
128                        matrix[[global_j, global_i]] += block.coefficients[[local_i, local_j]];
129                    }
130                }
131            }
132        }
133
134        matrix
135    }
136
137    /// Apply the SLFMM operator: y = (\[N\] + \[S\]\[D\]\[T\]) * x
138    ///
139    /// This is used in iterative solvers (CGS, BiCGSTAB).
140    ///
141    /// The decomposition is:
142    /// - Near-field: Direct element-to-element interactions for nearby clusters
143    /// - Far-field: Multipole expansions with S*D*T factorization
144    ///
145    /// # Arguments
146    /// * `x` - Input vector (length = num_dofs)
147    ///
148    /// # Returns
149    /// * `y` - Output vector (length = num_dofs), y = A*x
150    pub fn matvec(&self, x: &Array1<Complex64>) -> Array1<Complex64> {
151        // === Near-field contribution: y += [N] * x (parallelized) ===
152        // Compute all near-field block contributions in parallel, then sum them
153        let near_contributions: Vec<Vec<(usize, Complex64)>> =
154            parallel_flat_map(&self.near_matrix, |block| {
155                let src_dofs = &self.cluster_dof_indices[block.source_cluster];
156                let fld_dofs = &self.cluster_dof_indices[block.field_cluster];
157
158                let mut contributions = Vec::new();
159
160                // Gather x values from field cluster
161                let x_local: Array1<Complex64> = Array1::from_iter(fld_dofs.iter().map(|&i| x[i]));
162
163                // Apply block matrix
164                let y_local = block.coefficients.dot(&x_local);
165
166                // Collect contributions for source DOFs
167                for (local_i, &global_i) in src_dofs.iter().enumerate() {
168                    contributions.push((global_i, y_local[local_i]));
169                }
170
171                // Handle symmetric storage
172                if block.source_cluster != block.field_cluster {
173                    let x_src: Array1<Complex64> =
174                        Array1::from_iter(src_dofs.iter().map(|&i| x[i]));
175                    let y_fld = block.coefficients.t().dot(&x_src);
176                    for (local_j, &global_j) in fld_dofs.iter().enumerate() {
177                        contributions.push((global_j, y_fld[local_j]));
178                    }
179                }
180
181                vec![contributions]
182            });
183
184        // Accumulate near-field contributions
185        let mut y = Array1::zeros(self.num_dofs);
186        for contributions in near_contributions {
187            for (idx, val) in contributions {
188                y[idx] += val;
189            }
190        }
191
192        // === Far-field contribution: y += [S][D][T] * x (parallelized) ===
193
194        // Step 1: Compute multipole expansions for each cluster in parallel: t[c] = T[c] * x[c]
195        let multipoles: Vec<Array1<Complex64>> = crate::core::parallel::parallel_enumerate_map(
196            &self.t_matrices,
197            |cluster_idx, t_mat| {
198                let cluster_dofs = &self.cluster_dof_indices[cluster_idx];
199                if cluster_dofs.is_empty() || t_mat.is_empty() {
200                    Array1::zeros(self.num_sphere_points)
201                } else {
202                    let x_local: Array1<Complex64> =
203                        Array1::from_iter(cluster_dofs.iter().map(|&i| x[i]));
204                    t_mat.dot(&x_local)
205                }
206            },
207        );
208
209        // Step 2: Translate multipoles between far clusters (parallelized)
210        // Compute all D*multipole products in parallel
211        // D-matrix is diagonal, so D*x is just element-wise multiplication
212        let d_contributions: Vec<(usize, Array1<Complex64>)> =
213            parallel_map(&self.d_matrices, |d_entry| {
214                let src_mult = &multipoles[d_entry.source_cluster];
215                // Diagonal matrix-vector multiply: translated[i] = diagonal[i] * src_mult[i]
216                let translated = &d_entry.diagonal * src_mult;
217                (d_entry.field_cluster, translated)
218            });
219
220        // Accumulate D-matrix contributions into locals (sequential to avoid race)
221        let mut locals: Vec<Array1<Complex64>> = (0..self.num_clusters)
222            .map(|_| Array1::zeros(self.num_sphere_points))
223            .collect();
224
225        for (field_cluster, translated) in d_contributions {
226            for i in 0..self.num_sphere_points {
227                locals[field_cluster][i] += translated[i];
228            }
229        }
230
231        // Step 3: Evaluate locals at field points in parallel: y[c] += S[c] * locals[c]
232        let far_contributions: Vec<Vec<(usize, Complex64)>> =
233            parallel_enumerate_filter_map(&self.s_matrices, |cluster_idx, s_mat| {
234                let cluster_dofs = &self.cluster_dof_indices[cluster_idx];
235                if cluster_dofs.is_empty() || s_mat.is_empty() {
236                    return None;
237                }
238                let y_local = s_mat.dot(&locals[cluster_idx]);
239                let contributions: Vec<(usize, Complex64)> = cluster_dofs
240                    .iter()
241                    .enumerate()
242                    .map(|(local_j, &global_j)| (global_j, y_local[local_j]))
243                    .collect();
244                Some(contributions)
245            });
246
247        // Accumulate far-field contributions
248        for contributions in far_contributions {
249            for (idx, val) in contributions {
250                y[idx] += val;
251            }
252        }
253
254        y
255    }
256
257    /// Apply the SLFMM operator transpose: y = A^T * x (parallelized)
258    ///
259    /// Used by some iterative solvers (e.g., BiCGSTAB).
260    pub fn matvec_transpose(&self, x: &Array1<Complex64>) -> Array1<Complex64> {
261        // === Near-field contribution (transposed, parallelized) ===
262        let near_contributions: Vec<Vec<(usize, Complex64)>> =
263            parallel_flat_map(&self.near_matrix, |block| {
264                let src_dofs = &self.cluster_dof_indices[block.source_cluster];
265                let fld_dofs = &self.cluster_dof_indices[block.field_cluster];
266
267                let mut contributions = Vec::new();
268
269                // For transpose: gather from source DOFs, scatter to field DOFs
270                let x_local: Array1<Complex64> = Array1::from_iter(src_dofs.iter().map(|&i| x[i]));
271                let y_local = block.coefficients.t().dot(&x_local);
272                for (local_j, &global_j) in fld_dofs.iter().enumerate() {
273                    contributions.push((global_j, y_local[local_j]));
274                }
275
276                // Symmetric storage
277                if block.source_cluster != block.field_cluster {
278                    let x_fld: Array1<Complex64> =
279                        Array1::from_iter(fld_dofs.iter().map(|&i| x[i]));
280                    let y_src = block.coefficients.dot(&x_fld);
281                    for (local_i, &global_i) in src_dofs.iter().enumerate() {
282                        contributions.push((global_i, y_src[local_i]));
283                    }
284                }
285
286                vec![contributions]
287            });
288
289        // Accumulate near-field contributions
290        let mut y = Array1::zeros(self.num_dofs);
291        for contributions in near_contributions {
292            for (idx, val) in contributions {
293                y[idx] += val;
294            }
295        }
296
297        // === Far-field contribution (transposed, parallelized): y += T^T * D^T * S^T * x ===
298
299        // Step 1: S^T * x (parallelized)
300        let locals: Vec<Array1<Complex64>> = crate::core::parallel::parallel_enumerate_map(
301            &self.s_matrices,
302            |cluster_idx, s_mat| {
303                let cluster_dofs = &self.cluster_dof_indices[cluster_idx];
304                if cluster_dofs.is_empty() || s_mat.is_empty() {
305                    Array1::zeros(self.num_sphere_points)
306                } else {
307                    let x_local: Array1<Complex64> =
308                        Array1::from_iter(cluster_dofs.iter().map(|&i| x[i]));
309                    s_mat.t().dot(&x_local)
310                }
311            },
312        );
313
314        // Step 2: D^T translation (parallelized)
315        // D-matrix is diagonal, so D^T = D (diagonal matrices are symmetric)
316        let d_contributions: Vec<(usize, Array1<Complex64>)> =
317            parallel_map(&self.d_matrices, |d_entry| {
318                let fld_local = &locals[d_entry.field_cluster];
319                // Diagonal matrix-vector multiply: translated[i] = diagonal[i] * fld_local[i]
320                let translated = &d_entry.diagonal * fld_local;
321                (d_entry.source_cluster, translated)
322            });
323
324        // Accumulate D-matrix contributions
325        let mut multipoles: Vec<Array1<Complex64>> = (0..self.num_clusters)
326            .map(|_| Array1::zeros(self.num_sphere_points))
327            .collect();
328
329        for (source_cluster, translated) in d_contributions {
330            for i in 0..self.num_sphere_points {
331                multipoles[source_cluster][i] += translated[i];
332            }
333        }
334
335        // Step 3: T^T * multipoles (parallelized)
336        let far_contributions: Vec<Vec<(usize, Complex64)>> =
337            parallel_enumerate_filter_map(&self.t_matrices, |cluster_idx, t_mat| {
338                let cluster_dofs = &self.cluster_dof_indices[cluster_idx];
339                if cluster_dofs.is_empty() || t_mat.is_empty() {
340                    return None;
341                }
342                let y_local = t_mat.t().dot(&multipoles[cluster_idx]);
343                let contributions: Vec<(usize, Complex64)> = cluster_dofs
344                    .iter()
345                    .enumerate()
346                    .map(|(local_i, &global_i)| (global_i, y_local[local_i]))
347                    .collect();
348                Some(contributions)
349            });
350
351        // Accumulate far-field contributions
352        for contributions in far_contributions {
353            for (idx, val) in contributions {
354                y[idx] += val;
355            }
356        }
357
358        y
359    }
360}
361
362use math_audio_solvers::traits::LinearOperator;
363
364impl LinearOperator<Complex64> for SlfmmSystem {
365    fn num_rows(&self) -> usize {
366        self.num_dofs
367    }
368
369    fn num_cols(&self) -> usize {
370        self.num_dofs
371    }
372
373    fn apply(&self, x: &Array1<Complex64>) -> Array1<Complex64> {
374        self.matvec(x)
375    }
376
377    fn apply_transpose(&self, x: &Array1<Complex64>) -> Array1<Complex64> {
378        self.matvec_transpose(x)
379    }
380}
381
382/// Build the SLFMM system matrices
383///
384/// # Arguments
385/// * `elements` - Vector of mesh elements
386/// * `nodes` - Node coordinates (num_nodes × 3)
387/// * `clusters` - Vector of clusters
388/// * `physics` - Physics parameters
389/// * `n_theta` - Number of integration points in theta direction
390/// * `n_phi` - Number of integration points in phi direction
391/// * `n_terms` - Number of expansion terms
392pub fn build_slfmm_system(
393    elements: &[Element],
394    nodes: &Array2<f64>,
395    clusters: &[Cluster],
396    physics: &PhysicsParams,
397    n_theta: usize,
398    n_phi: usize,
399    n_terms: usize,
400) -> SlfmmSystem {
401    let num_dofs = count_dofs(elements);
402    let num_clusters = clusters.len();
403    let num_sphere_points = n_theta * n_phi;
404
405    let mut system = SlfmmSystem::new(num_dofs, num_clusters, num_sphere_points, n_terms);
406
407    // Build cluster DOF mappings: for each cluster, collect the global DOF indices
408    // of elements that belong to this cluster
409    build_cluster_dof_mappings(&mut system, elements, clusters);
410
411    // Compute unit sphere quadrature points
412    let (sphere_coords, sphere_weights) = unit_sphere_quadrature(n_theta, n_phi);
413
414    // Build near-field matrix
415    build_near_field(&mut system, elements, nodes, clusters, physics);
416
417    // Build T-matrices (element-to-cluster expansion)
418    build_t_matrices(
419        &mut system,
420        elements,
421        clusters,
422        physics,
423        &sphere_coords,
424        &sphere_weights,
425    );
426
427    // Build D-matrices (cluster-to-cluster translation)
428    build_d_matrices(&mut system, clusters, physics, &sphere_coords, n_terms);
429
430    // Build S-matrices (cluster-to-element evaluation)
431    build_s_matrices(
432        &mut system,
433        elements,
434        clusters,
435        physics,
436        &sphere_coords,
437        &sphere_weights,
438    );
439
440    system
441}
442
443/// Build the mapping from clusters to global DOF indices
444fn build_cluster_dof_mappings(
445    system: &mut SlfmmSystem,
446    elements: &[Element],
447    clusters: &[Cluster],
448) {
449    for cluster in clusters {
450        let mut dof_indices = Vec::new();
451        for &elem_idx in &cluster.element_indices {
452            let elem = &elements[elem_idx];
453            if elem.property.is_evaluation() {
454                continue;
455            }
456            // Collect all DOF addresses for this element
457            dof_indices.extend(elem.dof_addresses.iter().copied());
458        }
459        system.cluster_dof_indices.push(dof_indices);
460    }
461}
462
463/// Count total number of DOFs
464fn count_dofs(elements: &[Element]) -> usize {
465    elements
466        .iter()
467        .filter(|e| !e.property.is_evaluation())
468        .map(|e| e.dof_addresses.len())
469        .sum()
470}
471
472/// Build near-field matrix blocks (parallelized)
473fn build_near_field(
474    system: &mut SlfmmSystem,
475    elements: &[Element],
476    nodes: &Array2<f64>,
477    clusters: &[Cluster],
478    physics: &PhysicsParams,
479) {
480    let gamma = Complex64::new(physics.gamma(), 0.0);
481    let tau = Complex64::new(physics.tau, 0.0);
482    let beta = physics.burton_miller_beta();
483
484    // Collect all cluster pairs that need processing
485    let mut cluster_pairs: Vec<(usize, usize, bool)> = Vec::new();
486
487    for (i, cluster_i) in clusters.iter().enumerate() {
488        // Self-interaction
489        cluster_pairs.push((i, i, true));
490
491        // Interaction with near clusters
492        // Note: near_clusters lists must be symmetric, but we guard against asymmetry
493        // by checking both directions and avoiding duplicates
494        for &j in &cluster_i.near_clusters {
495            if j != i {
496                // Add pair regardless of i < j since near-lists might be asymmetric
497                // The pair (i,j) represents computing block from cluster j to cluster i
498                cluster_pairs.push((i, j, false));
499            }
500        }
501    }
502
503    // Compute all near-field blocks in parallel
504    let near_blocks: Vec<NearFieldBlock> = parallel_map(&cluster_pairs, |&(i, j, is_self)| {
505        let cluster_i = &clusters[i];
506        let cluster_j = &clusters[j];
507
508        let mut block = compute_near_block(
509            elements,
510            nodes,
511            &cluster_i.element_indices,
512            &cluster_j.element_indices,
513            physics,
514            gamma,
515            tau,
516            beta,
517            is_self,
518        );
519
520        // Add free terms to diagonal for self-interaction blocks
521        if is_self {
522            for local_idx in 0..cluster_i.element_indices.len() {
523                let elem_idx = cluster_i.element_indices[local_idx];
524                let elem = &elements[elem_idx];
525                if elem.property.is_evaluation() {
526                    continue;
527                }
528                match &elem.boundary_condition {
529                    BoundaryCondition::Velocity(_)
530                    | BoundaryCondition::VelocityWithAdmittance { .. } => {
531                        block[[local_idx, local_idx]] += gamma * 0.5;
532                    }
533                    BoundaryCondition::Pressure(_) => {
534                        block[[local_idx, local_idx]] += beta * tau * 0.5;
535                    }
536                    _ => {}
537                }
538            }
539        }
540
541        NearFieldBlock {
542            source_cluster: i,
543            field_cluster: j,
544            coefficients: block,
545        }
546    });
547
548    system.near_matrix = near_blocks;
549}
550
551/// Compute a near-field block between two sets of elements
552fn compute_near_block(
553    elements: &[Element],
554    nodes: &Array2<f64>,
555    source_indices: &[usize],
556    field_indices: &[usize],
557    physics: &PhysicsParams,
558    gamma: Complex64,
559    tau: Complex64,
560    beta: Complex64,
561    is_self: bool,
562) -> Array2<Complex64> {
563    let n_source = source_indices.len();
564    let n_field = field_indices.len();
565    let mut block = Array2::zeros((n_source, n_field));
566
567    for (i, &src_idx) in source_indices.iter().enumerate() {
568        let source_elem = &elements[src_idx];
569        if source_elem.property.is_evaluation() {
570            continue;
571        }
572
573        for (j, &fld_idx) in field_indices.iter().enumerate() {
574            let field_elem = &elements[fld_idx];
575            if field_elem.property.is_evaluation() {
576                continue;
577            }
578
579            let element_coords = get_element_coords(field_elem, nodes);
580
581            let result = if is_self && src_idx == fld_idx {
582                // Singular integration
583                singular_integration(
584                    &source_elem.center,
585                    &source_elem.normal,
586                    &element_coords,
587                    field_elem.element_type,
588                    physics,
589                    None,
590                    0,
591                    false,
592                )
593            } else {
594                // Regular integration
595                regular_integration(
596                    &source_elem.center,
597                    &source_elem.normal,
598                    &element_coords,
599                    field_elem.element_type,
600                    field_elem.area,
601                    physics,
602                    None,
603                    0,
604                    false,
605                )
606            };
607
608            // Assemble using Burton-Miller formulation
609            let coeff = result.dg_dn_integral * gamma * tau + result.d2g_dnxdny_integral * beta;
610            block[[i, j]] = coeff;
611        }
612    }
613
614    block
615}
616
617/// Build T-matrices (element to cluster multipole expansion) - parallelized
618fn build_t_matrices(
619    system: &mut SlfmmSystem,
620    elements: &[Element],
621    clusters: &[Cluster],
622    physics: &PhysicsParams,
623    sphere_coords: &[[f64; 3]],
624    sphere_weights: &[f64],
625) {
626    let k = physics.wave_number;
627    let num_sphere_points = sphere_coords.len();
628
629    // Build all T-matrices in parallel
630    let t_matrices: Vec<Array2<Complex64>> = parallel_map(clusters, |cluster| {
631        let num_elem = cluster.element_indices.len();
632        let mut t_matrix = Array2::zeros((num_sphere_points, num_elem));
633
634        for (j, &elem_idx) in cluster.element_indices.iter().enumerate() {
635            let elem = &elements[elem_idx];
636            if elem.property.is_evaluation() {
637                continue;
638            }
639
640            // Precompute difference vector
641            let diff: [f64; 3] = [
642                elem.center[0] - cluster.center[0],
643                elem.center[1] - cluster.center[1],
644                elem.center[2] - cluster.center[2],
645            ];
646
647            // For each integration direction on the unit sphere
648            for (p, coord) in sphere_coords.iter().enumerate() {
649                let s_dot_diff = coord[0] * diff[0] + coord[1] * diff[1] + coord[2] * diff[2];
650                let exp_factor = Complex64::new((k * s_dot_diff).cos(), -(k * s_dot_diff).sin());
651                t_matrix[[p, j]] = exp_factor * sphere_weights[p];
652            }
653        }
654
655        t_matrix
656    });
657
658    system.t_matrices = t_matrices;
659}
660
661/// Build D-matrices (cluster to cluster translation) - parallelized
662///
663/// The D-matrix is diagonal in the single-level FMM, so we only store
664/// the diagonal entries. This reduces memory from O(P²) to O(P) per pair
665/// where P is the number of sphere integration points.
666fn build_d_matrices(
667    system: &mut SlfmmSystem,
668    clusters: &[Cluster],
669    physics: &PhysicsParams,
670    _sphere_coords: &[[f64; 3]],
671    n_terms: usize,
672) {
673    let k = physics.wave_number;
674    let num_sphere_points = system.num_sphere_points;
675
676    // Collect all far cluster pairs
677    let mut far_pairs: Vec<(usize, usize)> = Vec::new();
678    for (i, cluster_i) in clusters.iter().enumerate() {
679        for &j in &cluster_i.far_clusters {
680            far_pairs.push((i, j));
681        }
682    }
683
684    // Log memory estimate
685    let num_pairs = far_pairs.len();
686    let mem_per_pair = num_sphere_points * std::mem::size_of::<Complex64>();
687    let total_mem_mb = (num_pairs * mem_per_pair) as f64 / (1024.0 * 1024.0);
688    if total_mem_mb > 100.0 {
689        eprintln!(
690            "    D-matrices: {} far pairs × {} points = {:.1} MB",
691            num_pairs, num_sphere_points, total_mem_mb
692        );
693    }
694
695    // Compute all D-matrices in parallel (diagonal storage only)
696    let d_matrices: Vec<DMatrixEntry> = parallel_map(&far_pairs, |&(i, j)| {
697        let cluster_i = &clusters[i];
698        let cluster_j = &clusters[j];
699
700        // Distance vector between cluster centers
701        let diff = [
702            cluster_i.center[0] - cluster_j.center[0],
703            cluster_i.center[1] - cluster_j.center[1],
704            cluster_i.center[2] - cluster_j.center[2],
705        ];
706        let r = (diff[0] * diff[0] + diff[1] * diff[1] + diff[2] * diff[2]).sqrt();
707        let kr = k * r;
708
709        // Compute translation using spherical Hankel functions
710        let h_funcs = spherical_hankel_first_kind(n_terms.max(2), kr, 1.0);
711
712        // D-matrix is diagonal: D[p,p] = h_0(kr) * ik
713        // Store only the diagonal (all entries are the same in this simplified model)
714        let d_value = h_funcs[0] * Complex64::new(0.0, k);
715        let diagonal = Array1::from_elem(num_sphere_points, d_value);
716
717        DMatrixEntry {
718            source_cluster: i,
719            field_cluster: j,
720            diagonal,
721        }
722    });
723
724    system.d_matrices = d_matrices;
725}
726
727/// Build S-matrices (cluster multipole to element evaluation) - parallelized
728fn build_s_matrices(
729    system: &mut SlfmmSystem,
730    elements: &[Element],
731    clusters: &[Cluster],
732    physics: &PhysicsParams,
733    sphere_coords: &[[f64; 3]],
734    sphere_weights: &[f64],
735) {
736    let k = physics.wave_number;
737    let num_sphere_points = sphere_coords.len();
738
739    // Build all S-matrices in parallel
740    let s_matrices: Vec<Array2<Complex64>> = parallel_map(clusters, |cluster| {
741        let num_elem = cluster.element_indices.len();
742        let mut s_matrix = Array2::zeros((num_elem, num_sphere_points));
743
744        for (j, &elem_idx) in cluster.element_indices.iter().enumerate() {
745            let elem = &elements[elem_idx];
746            if elem.property.is_evaluation() {
747                continue;
748            }
749
750            // Precompute difference vector
751            let diff: [f64; 3] = [
752                elem.center[0] - cluster.center[0],
753                elem.center[1] - cluster.center[1],
754                elem.center[2] - cluster.center[2],
755            ];
756
757            // For each integration direction on the unit sphere
758            for (p, coord) in sphere_coords.iter().enumerate() {
759                let s_dot_diff = coord[0] * diff[0] + coord[1] * diff[1] + coord[2] * diff[2];
760                let exp_factor = Complex64::new((k * s_dot_diff).cos(), (k * s_dot_diff).sin());
761                s_matrix[[j, p]] = exp_factor * sphere_weights[p];
762            }
763        }
764
765        s_matrix
766    });
767
768    system.s_matrices = s_matrices;
769}
770
771/// Get element node coordinates as Array2
772fn get_element_coords(element: &Element, nodes: &Array2<f64>) -> Array2<f64> {
773    let num_nodes = element.connectivity.len();
774    let mut coords = Array2::zeros((num_nodes, 3));
775
776    for (i, &node_idx) in element.connectivity.iter().enumerate() {
777        for j in 0..3 {
778            coords[[i, j]] = nodes[[node_idx, j]];
779        }
780    }
781
782    coords
783}
784
785#[cfg(test)]
786mod tests {
787    use super::*;
788    use crate::core::types::{BoundaryCondition, ElementProperty, ElementType};
789    use ndarray::array;
790
791    fn make_test_cluster() -> Cluster {
792        let mut cluster = Cluster::new(array![0.5, 0.5, 0.0]);
793        cluster.element_indices = vec![0, 1];
794        cluster.near_clusters = vec![];
795        cluster.far_clusters = vec![];
796        cluster
797    }
798
799    fn make_test_elements() -> (Vec<Element>, Array2<f64>) {
800        let nodes = Array2::from_shape_vec(
801            (4, 3),
802            vec![0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.5, 1.0, 0.0, 1.5, 1.0, 0.0],
803        )
804        .unwrap();
805
806        let elem0 = Element {
807            connectivity: vec![0, 1, 2],
808            element_type: ElementType::Tri3,
809            property: ElementProperty::Surface,
810            normal: array![0.0, 0.0, 1.0],
811            node_normals: Array2::zeros((3, 3)),
812            center: array![0.5, 1.0 / 3.0, 0.0],
813            area: 0.5,
814            boundary_condition: BoundaryCondition::Velocity(vec![Complex64::new(1.0, 0.0)]),
815            group: 0,
816            dof_addresses: vec![0],
817        };
818
819        let elem1 = Element {
820            connectivity: vec![1, 3, 2],
821            element_type: ElementType::Tri3,
822            property: ElementProperty::Surface,
823            normal: array![0.0, 0.0, 1.0],
824            node_normals: Array2::zeros((3, 3)),
825            center: array![1.0, 2.0 / 3.0, 0.0],
826            area: 0.5,
827            boundary_condition: BoundaryCondition::Velocity(vec![Complex64::new(0.0, 0.0)]),
828            group: 0,
829            dof_addresses: vec![1],
830        };
831
832        (vec![elem0, elem1], nodes)
833    }
834
835    #[test]
836    fn test_slfmm_system_creation() {
837        let system = SlfmmSystem::new(10, 2, 32, 5);
838        assert_eq!(system.num_dofs, 10);
839        assert_eq!(system.num_sphere_points, 32);
840        assert_eq!(system.num_expansion_terms, 5);
841    }
842
843    #[test]
844    fn test_build_slfmm_system() {
845        let (elements, nodes) = make_test_elements();
846        let cluster = make_test_cluster();
847        let clusters = vec![cluster];
848        let physics = PhysicsParams::new(100.0, 343.0, 1.21, false);
849
850        let system = build_slfmm_system(&elements, &nodes, &clusters, &physics, 4, 8, 5);
851
852        assert_eq!(system.num_dofs, 2);
853        assert_eq!(system.t_matrices.len(), 1);
854        assert_eq!(system.s_matrices.len(), 1);
855    }
856
857    #[test]
858    fn test_near_field_block() {
859        let (elements, nodes) = make_test_elements();
860        let physics = PhysicsParams::new(100.0, 343.0, 1.21, false);
861        let gamma = Complex64::new(physics.gamma(), 0.0);
862        let tau = Complex64::new(physics.tau, 0.0);
863        let beta = physics.burton_miller_beta();
864
865        let block = compute_near_block(
866            &elements,
867            &nodes,
868            &[0, 1],
869            &[0, 1],
870            &physics,
871            gamma,
872            tau,
873            beta,
874            true,
875        );
876
877        assert_eq!(block.shape(), &[2, 2]);
878        // Diagonal entries should be non-zero
879        assert!(block[[0, 0]].norm() > 0.0);
880        assert!(block[[1, 1]].norm() > 0.0);
881    }
882}