bem/core/assembly/
slfmm.rs

1//! Single-Level Fast Multipole Method (SLFMM) assembly
2//!
3//! Direct port of NC_BuildSystemFMBEM (SLFMM mode) from NC_EquationSystem.cpp.
4//!
5//! The SLFMM decomposes the BEM interaction matrix as:
6//! ```text
7//! [A] = [N] + [S][D][T]
8//! ```
9//! where:
10//! - [N] is the near-field matrix (direct interactions between nearby elements)
11//! - [T] is the T-matrix (element-to-cluster multipole expansion)
12//! - [D] is the D-matrix (cluster-to-cluster translation)
13//! - [S] is the S-matrix (cluster-to-element local expansion)
14//!
15//! **Note**: This module requires either the `native` or `wasm` feature for parallel processing.
16//! Parallelism is provided via rayon (native) or wasm-bindgen-rayon (WASM).
17
18#![cfg(any(feature = "native", feature = "wasm"))]
19
20use ndarray::{Array1, Array2};
21use num_complex::Complex64;
22
23use crate::core::parallel::{parallel_enumerate_filter_map, parallel_flat_map, parallel_map};
24
25use crate::core::integration::{regular_integration, singular_integration, unit_sphere_quadrature};
26use crate::core::types::{BoundaryCondition, Cluster, Element, PhysicsParams};
27use math_wave::special::spherical_hankel_first_kind;
28
29/// Result of SLFMM assembly
30#[derive(Clone)]
31pub struct SlfmmSystem {
32    /// Near-field coefficient matrix (sparse, stored as dense blocks)
33    pub near_matrix: Vec<NearFieldBlock>,
34    /// T-matrix for each cluster (element DOFs → multipole expansion)
35    pub t_matrices: Vec<Array2<Complex64>>,
36    /// T-vector for RHS contribution
37    pub t_vector: Array1<Complex64>,
38    /// D-matrix entries for far cluster pairs
39    pub d_matrices: Vec<DMatrixEntry>,
40    /// S-matrix for each cluster (multipole expansion → field DOFs)
41    pub s_matrices: Vec<Array2<Complex64>>,
42    /// RHS vector
43    pub rhs: Array1<Complex64>,
44    /// Number of DOFs
45    pub num_dofs: usize,
46    /// Number of integration points on unit sphere
47    pub num_sphere_points: usize,
48    /// Number of expansion terms
49    pub num_expansion_terms: usize,
50    /// Number of clusters
51    pub num_clusters: usize,
52    /// Cluster DOF mappings: for each cluster, the global DOF indices
53    pub cluster_dof_indices: Vec<Vec<usize>>,
54}
55
56/// Near-field block between two clusters
57#[derive(Debug, Clone)]
58pub struct NearFieldBlock {
59    /// Source cluster index
60    pub source_cluster: usize,
61    /// Field cluster index
62    pub field_cluster: usize,
63    /// Dense coefficient matrix for this block
64    pub coefficients: Array2<Complex64>,
65}
66
67/// D-matrix entry for a far cluster pair
68///
69/// The D-matrix for SLFMM is diagonal (same multipole index in source and field),
70/// so we store only the diagonal entries to save memory.
71/// This reduces storage from O(n²) to O(n) per cluster pair.
72#[derive(Debug, Clone)]
73pub struct DMatrixEntry {
74    /// Source cluster index
75    pub source_cluster: usize,
76    /// Field cluster index
77    pub field_cluster: usize,
78    /// Diagonal translation coefficients (length = num_sphere_points)
79    /// The full D-matrix would be: D[p,q] = diagonal[p] if p == q, else 0
80    pub diagonal: Array1<Complex64>,
81}
82
83impl SlfmmSystem {
84    /// Create a new empty SLFMM system
85    pub fn new(
86        num_dofs: usize,
87        num_clusters: usize,
88        num_sphere_points: usize,
89        num_expansion_terms: usize,
90    ) -> Self {
91        Self {
92            near_matrix: Vec::new(),
93            t_matrices: Vec::with_capacity(num_clusters),
94            t_vector: Array1::zeros(num_sphere_points * num_clusters),
95            d_matrices: Vec::new(),
96            s_matrices: Vec::with_capacity(num_clusters),
97            rhs: Array1::zeros(num_dofs),
98            num_dofs,
99            num_sphere_points,
100            num_expansion_terms,
101            num_clusters,
102            cluster_dof_indices: Vec::with_capacity(num_clusters),
103        }
104    }
105
106    /// Extract the near-field matrix as a dense matrix for preconditioning
107    ///
108    /// This assembles only the near-field blocks into a dense matrix,
109    /// which is much smaller than the full matrix (O(N) vs O(N²) entries).
110    pub fn extract_near_field_matrix(&self) -> Array2<Complex64> {
111        let mut matrix = Array2::zeros((self.num_dofs, self.num_dofs));
112
113        for block in &self.near_matrix {
114            let src_dofs = &self.cluster_dof_indices[block.source_cluster];
115            let fld_dofs = &self.cluster_dof_indices[block.field_cluster];
116
117            // Copy block to global matrix
118            for (local_i, &global_i) in src_dofs.iter().enumerate() {
119                for (local_j, &global_j) in fld_dofs.iter().enumerate() {
120                    matrix[[global_i, global_j]] += block.coefficients[[local_i, local_j]];
121                }
122            }
123
124            // Handle symmetric storage: also fill the (fld, src) part
125            if block.source_cluster != block.field_cluster {
126                for (local_i, &global_i) in src_dofs.iter().enumerate() {
127                    for (local_j, &global_j) in fld_dofs.iter().enumerate() {
128                        matrix[[global_j, global_i]] += block.coefficients[[local_i, local_j]];
129                    }
130                }
131            }
132        }
133
134        matrix
135    }
136
137    /// Apply the SLFMM operator: y = ([N] + [S][D][T]) * x
138    ///
139    /// This is used in iterative solvers (CGS, BiCGSTAB).
140    ///
141    /// The decomposition is:
142    /// - Near-field: Direct element-to-element interactions for nearby clusters
143    /// - Far-field: Multipole expansions with S*D*T factorization
144    ///
145    /// # Arguments
146    /// * `x` - Input vector (length = num_dofs)
147    ///
148    /// # Returns
149    /// * `y` - Output vector (length = num_dofs), y = A*x
150    pub fn matvec(&self, x: &Array1<Complex64>) -> Array1<Complex64> {
151        // === Near-field contribution: y += [N] * x (parallelized) ===
152        // Compute all near-field block contributions in parallel, then sum them
153        let near_contributions: Vec<Vec<(usize, Complex64)>> =
154            parallel_flat_map(&self.near_matrix, |block| {
155                let src_dofs = &self.cluster_dof_indices[block.source_cluster];
156                let fld_dofs = &self.cluster_dof_indices[block.field_cluster];
157
158                let mut contributions = Vec::new();
159
160                // Gather x values from field cluster
161                let x_local: Array1<Complex64> = Array1::from_iter(fld_dofs.iter().map(|&i| x[i]));
162
163                // Apply block matrix
164                let y_local = block.coefficients.dot(&x_local);
165
166                // Collect contributions for source DOFs
167                for (local_i, &global_i) in src_dofs.iter().enumerate() {
168                    contributions.push((global_i, y_local[local_i]));
169                }
170
171                // Handle symmetric storage
172                if block.source_cluster != block.field_cluster {
173                    let x_src: Array1<Complex64> =
174                        Array1::from_iter(src_dofs.iter().map(|&i| x[i]));
175                    let y_fld = block.coefficients.t().dot(&x_src);
176                    for (local_j, &global_j) in fld_dofs.iter().enumerate() {
177                        contributions.push((global_j, y_fld[local_j]));
178                    }
179                }
180
181                vec![contributions]
182            });
183
184        // Accumulate near-field contributions
185        let mut y = Array1::zeros(self.num_dofs);
186        for contributions in near_contributions {
187            for (idx, val) in contributions {
188                y[idx] += val;
189            }
190        }
191
192        // === Far-field contribution: y += [S][D][T] * x (parallelized) ===
193
194        // Step 1: Compute multipole expansions for each cluster in parallel: t[c] = T[c] * x[c]
195        let multipoles: Vec<Array1<Complex64>> = crate::core::parallel::parallel_enumerate_map(
196            &self.t_matrices,
197            |cluster_idx, t_mat| {
198                let cluster_dofs = &self.cluster_dof_indices[cluster_idx];
199                if cluster_dofs.is_empty() || t_mat.is_empty() {
200                    Array1::zeros(self.num_sphere_points)
201                } else {
202                    let x_local: Array1<Complex64> =
203                        Array1::from_iter(cluster_dofs.iter().map(|&i| x[i]));
204                    t_mat.dot(&x_local)
205                }
206            },
207        );
208
209        // Step 2: Translate multipoles between far clusters (parallelized)
210        // Compute all D*multipole products in parallel
211        // D-matrix is diagonal, so D*x is just element-wise multiplication
212        let d_contributions: Vec<(usize, Array1<Complex64>)> =
213            parallel_map(&self.d_matrices, |d_entry| {
214                let src_mult = &multipoles[d_entry.source_cluster];
215                // Diagonal matrix-vector multiply: translated[i] = diagonal[i] * src_mult[i]
216                let translated = &d_entry.diagonal * src_mult;
217                (d_entry.field_cluster, translated)
218            });
219
220        // Accumulate D-matrix contributions into locals (sequential to avoid race)
221        let mut locals: Vec<Array1<Complex64>> = (0..self.num_clusters)
222            .map(|_| Array1::zeros(self.num_sphere_points))
223            .collect();
224
225        for (field_cluster, translated) in d_contributions {
226            for i in 0..self.num_sphere_points {
227                locals[field_cluster][i] += translated[i];
228            }
229        }
230
231        // Step 3: Evaluate locals at field points in parallel: y[c] += S[c] * locals[c]
232        let far_contributions: Vec<Vec<(usize, Complex64)>> =
233            parallel_enumerate_filter_map(&self.s_matrices, |cluster_idx, s_mat| {
234                let cluster_dofs = &self.cluster_dof_indices[cluster_idx];
235                if cluster_dofs.is_empty() || s_mat.is_empty() {
236                    return None;
237                }
238                let y_local = s_mat.dot(&locals[cluster_idx]);
239                let contributions: Vec<(usize, Complex64)> = cluster_dofs
240                    .iter()
241                    .enumerate()
242                    .map(|(local_j, &global_j)| (global_j, y_local[local_j]))
243                    .collect();
244                Some(contributions)
245            });
246
247        // Accumulate far-field contributions
248        for contributions in far_contributions {
249            for (idx, val) in contributions {
250                y[idx] += val;
251            }
252        }
253
254        y
255    }
256
257    /// Apply the SLFMM operator transpose: y = A^T * x (parallelized)
258    ///
259    /// Used by some iterative solvers (e.g., BiCGSTAB).
260    pub fn matvec_transpose(&self, x: &Array1<Complex64>) -> Array1<Complex64> {
261        // === Near-field contribution (transposed, parallelized) ===
262        let near_contributions: Vec<Vec<(usize, Complex64)>> =
263            parallel_flat_map(&self.near_matrix, |block| {
264                let src_dofs = &self.cluster_dof_indices[block.source_cluster];
265                let fld_dofs = &self.cluster_dof_indices[block.field_cluster];
266
267                let mut contributions = Vec::new();
268
269                // For transpose: gather from source DOFs, scatter to field DOFs
270                let x_local: Array1<Complex64> = Array1::from_iter(src_dofs.iter().map(|&i| x[i]));
271                let y_local = block.coefficients.t().dot(&x_local);
272                for (local_j, &global_j) in fld_dofs.iter().enumerate() {
273                    contributions.push((global_j, y_local[local_j]));
274                }
275
276                // Symmetric storage
277                if block.source_cluster != block.field_cluster {
278                    let x_fld: Array1<Complex64> =
279                        Array1::from_iter(fld_dofs.iter().map(|&i| x[i]));
280                    let y_src = block.coefficients.dot(&x_fld);
281                    for (local_i, &global_i) in src_dofs.iter().enumerate() {
282                        contributions.push((global_i, y_src[local_i]));
283                    }
284                }
285
286                vec![contributions]
287            });
288
289        // Accumulate near-field contributions
290        let mut y = Array1::zeros(self.num_dofs);
291        for contributions in near_contributions {
292            for (idx, val) in contributions {
293                y[idx] += val;
294            }
295        }
296
297        // === Far-field contribution (transposed, parallelized): y += T^T * D^T * S^T * x ===
298
299        // Step 1: S^T * x (parallelized)
300        let locals: Vec<Array1<Complex64>> = crate::core::parallel::parallel_enumerate_map(
301            &self.s_matrices,
302            |cluster_idx, s_mat| {
303                let cluster_dofs = &self.cluster_dof_indices[cluster_idx];
304                if cluster_dofs.is_empty() || s_mat.is_empty() {
305                    Array1::zeros(self.num_sphere_points)
306                } else {
307                    let x_local: Array1<Complex64> =
308                        Array1::from_iter(cluster_dofs.iter().map(|&i| x[i]));
309                    s_mat.t().dot(&x_local)
310                }
311            },
312        );
313
314        // Step 2: D^T translation (parallelized)
315        // D-matrix is diagonal, so D^T = D (diagonal matrices are symmetric)
316        let d_contributions: Vec<(usize, Array1<Complex64>)> =
317            parallel_map(&self.d_matrices, |d_entry| {
318                let fld_local = &locals[d_entry.field_cluster];
319                // Diagonal matrix-vector multiply: translated[i] = diagonal[i] * fld_local[i]
320                let translated = &d_entry.diagonal * fld_local;
321                (d_entry.source_cluster, translated)
322            });
323
324        // Accumulate D-matrix contributions
325        let mut multipoles: Vec<Array1<Complex64>> = (0..self.num_clusters)
326            .map(|_| Array1::zeros(self.num_sphere_points))
327            .collect();
328
329        for (source_cluster, translated) in d_contributions {
330            for i in 0..self.num_sphere_points {
331                multipoles[source_cluster][i] += translated[i];
332            }
333        }
334
335        // Step 3: T^T * multipoles (parallelized)
336        let far_contributions: Vec<Vec<(usize, Complex64)>> =
337            parallel_enumerate_filter_map(&self.t_matrices, |cluster_idx, t_mat| {
338                let cluster_dofs = &self.cluster_dof_indices[cluster_idx];
339                if cluster_dofs.is_empty() || t_mat.is_empty() {
340                    return None;
341                }
342                let y_local = t_mat.t().dot(&multipoles[cluster_idx]);
343                let contributions: Vec<(usize, Complex64)> = cluster_dofs
344                    .iter()
345                    .enumerate()
346                    .map(|(local_i, &global_i)| (global_i, y_local[local_i]))
347                    .collect();
348                Some(contributions)
349            });
350
351        // Accumulate far-field contributions
352        for contributions in far_contributions {
353            for (idx, val) in contributions {
354                y[idx] += val;
355            }
356        }
357
358        y
359    }
360}
361
362/// Build the SLFMM system matrices
363///
364/// # Arguments
365/// * `elements` - Vector of mesh elements
366/// * `nodes` - Node coordinates (num_nodes × 3)
367/// * `clusters` - Vector of clusters
368/// * `physics` - Physics parameters
369/// * `n_theta` - Number of integration points in theta direction
370/// * `n_phi` - Number of integration points in phi direction
371/// * `n_terms` - Number of expansion terms
372pub fn build_slfmm_system(
373    elements: &[Element],
374    nodes: &Array2<f64>,
375    clusters: &[Cluster],
376    physics: &PhysicsParams,
377    n_theta: usize,
378    n_phi: usize,
379    n_terms: usize,
380) -> SlfmmSystem {
381    let num_dofs = count_dofs(elements);
382    let num_clusters = clusters.len();
383    let num_sphere_points = n_theta * n_phi;
384
385    let mut system = SlfmmSystem::new(num_dofs, num_clusters, num_sphere_points, n_terms);
386
387    // Build cluster DOF mappings: for each cluster, collect the global DOF indices
388    // of elements that belong to this cluster
389    build_cluster_dof_mappings(&mut system, elements, clusters);
390
391    // Compute unit sphere quadrature points
392    let (sphere_coords, sphere_weights) = unit_sphere_quadrature(n_theta, n_phi);
393
394    // Build near-field matrix
395    build_near_field(&mut system, elements, nodes, clusters, physics);
396
397    // Build T-matrices (element-to-cluster expansion)
398    build_t_matrices(
399        &mut system,
400        elements,
401        clusters,
402        physics,
403        &sphere_coords,
404        &sphere_weights,
405    );
406
407    // Build D-matrices (cluster-to-cluster translation)
408    build_d_matrices(&mut system, clusters, physics, &sphere_coords, n_terms);
409
410    // Build S-matrices (cluster-to-element evaluation)
411    build_s_matrices(
412        &mut system,
413        elements,
414        clusters,
415        physics,
416        &sphere_coords,
417        &sphere_weights,
418    );
419
420    system
421}
422
423/// Build the mapping from clusters to global DOF indices
424fn build_cluster_dof_mappings(
425    system: &mut SlfmmSystem,
426    elements: &[Element],
427    clusters: &[Cluster],
428) {
429    for cluster in clusters {
430        let mut dof_indices = Vec::new();
431        for &elem_idx in &cluster.element_indices {
432            let elem = &elements[elem_idx];
433            if elem.property.is_evaluation() {
434                continue;
435            }
436            // Collect all DOF addresses for this element
437            dof_indices.extend(elem.dof_addresses.iter().copied());
438        }
439        system.cluster_dof_indices.push(dof_indices);
440    }
441}
442
443/// Count total number of DOFs
444fn count_dofs(elements: &[Element]) -> usize {
445    elements
446        .iter()
447        .filter(|e| !e.property.is_evaluation())
448        .map(|e| e.dof_addresses.len())
449        .sum()
450}
451
452/// Build near-field matrix blocks (parallelized)
453fn build_near_field(
454    system: &mut SlfmmSystem,
455    elements: &[Element],
456    nodes: &Array2<f64>,
457    clusters: &[Cluster],
458    physics: &PhysicsParams,
459) {
460    let gamma = Complex64::new(physics.gamma(), 0.0);
461    let tau = Complex64::new(physics.tau, 0.0);
462    let beta = physics.burton_miller_beta();
463
464    // Collect all cluster pairs that need processing
465    let mut cluster_pairs: Vec<(usize, usize, bool)> = Vec::new();
466
467    for (i, cluster_i) in clusters.iter().enumerate() {
468        // Self-interaction
469        cluster_pairs.push((i, i, true));
470
471        // Interaction with near clusters (only upper triangle)
472        for &j in &cluster_i.near_clusters {
473            if j > i {
474                cluster_pairs.push((i, j, false));
475            }
476        }
477    }
478
479    // Compute all near-field blocks in parallel
480    let near_blocks: Vec<NearFieldBlock> = parallel_map(&cluster_pairs, |&(i, j, is_self)| {
481        let cluster_i = &clusters[i];
482        let cluster_j = &clusters[j];
483
484        let mut block = compute_near_block(
485            elements,
486            nodes,
487            &cluster_i.element_indices,
488            &cluster_j.element_indices,
489            physics,
490            gamma,
491            tau,
492            beta,
493            is_self,
494        );
495
496        // Add free terms to diagonal for self-interaction blocks
497        if is_self {
498            for local_idx in 0..cluster_i.element_indices.len() {
499                let elem_idx = cluster_i.element_indices[local_idx];
500                let elem = &elements[elem_idx];
501                if elem.property.is_evaluation() {
502                    continue;
503                }
504                match &elem.boundary_condition {
505                    BoundaryCondition::Velocity(_)
506                    | BoundaryCondition::VelocityWithAdmittance { .. } => {
507                        block[[local_idx, local_idx]] += gamma * 0.5;
508                    }
509                    BoundaryCondition::Pressure(_) => {
510                        block[[local_idx, local_idx]] += beta * tau * 0.5;
511                    }
512                    _ => {}
513                }
514            }
515        }
516
517        NearFieldBlock {
518            source_cluster: i,
519            field_cluster: j,
520            coefficients: block,
521        }
522    });
523
524    system.near_matrix = near_blocks;
525}
526
527/// Compute a near-field block between two sets of elements
528fn compute_near_block(
529    elements: &[Element],
530    nodes: &Array2<f64>,
531    source_indices: &[usize],
532    field_indices: &[usize],
533    physics: &PhysicsParams,
534    gamma: Complex64,
535    tau: Complex64,
536    beta: Complex64,
537    is_self: bool,
538) -> Array2<Complex64> {
539    let n_source = source_indices.len();
540    let n_field = field_indices.len();
541    let mut block = Array2::zeros((n_source, n_field));
542
543    for (i, &src_idx) in source_indices.iter().enumerate() {
544        let source_elem = &elements[src_idx];
545        if source_elem.property.is_evaluation() {
546            continue;
547        }
548
549        for (j, &fld_idx) in field_indices.iter().enumerate() {
550            let field_elem = &elements[fld_idx];
551            if field_elem.property.is_evaluation() {
552                continue;
553            }
554
555            let element_coords = get_element_coords(field_elem, nodes);
556
557            let result = if is_self && src_idx == fld_idx {
558                // Singular integration
559                singular_integration(
560                    &source_elem.center,
561                    &source_elem.normal,
562                    &element_coords,
563                    field_elem.element_type,
564                    physics,
565                    None,
566                    0,
567                    false,
568                )
569            } else {
570                // Regular integration
571                regular_integration(
572                    &source_elem.center,
573                    &source_elem.normal,
574                    &element_coords,
575                    field_elem.element_type,
576                    field_elem.area,
577                    physics,
578                    None,
579                    0,
580                    false,
581                )
582            };
583
584            // Assemble using Burton-Miller formulation
585            let coeff = result.dg_dn_integral * gamma * tau + result.d2g_dnxdny_integral * beta;
586            block[[i, j]] = coeff;
587        }
588    }
589
590    block
591}
592
593/// Build T-matrices (element to cluster multipole expansion) - parallelized
594fn build_t_matrices(
595    system: &mut SlfmmSystem,
596    elements: &[Element],
597    clusters: &[Cluster],
598    physics: &PhysicsParams,
599    sphere_coords: &[[f64; 3]],
600    sphere_weights: &[f64],
601) {
602    let k = physics.wave_number;
603    let num_sphere_points = sphere_coords.len();
604
605    // Build all T-matrices in parallel
606    let t_matrices: Vec<Array2<Complex64>> = parallel_map(clusters, |cluster| {
607        let num_elem = cluster.element_indices.len();
608        let mut t_matrix = Array2::zeros((num_sphere_points, num_elem));
609
610        for (j, &elem_idx) in cluster.element_indices.iter().enumerate() {
611            let elem = &elements[elem_idx];
612            if elem.property.is_evaluation() {
613                continue;
614            }
615
616            // Precompute difference vector
617            let diff: [f64; 3] = [
618                elem.center[0] - cluster.center[0],
619                elem.center[1] - cluster.center[1],
620                elem.center[2] - cluster.center[2],
621            ];
622
623            // For each integration direction on the unit sphere
624            for (p, coord) in sphere_coords.iter().enumerate() {
625                let s_dot_diff = coord[0] * diff[0] + coord[1] * diff[1] + coord[2] * diff[2];
626                let exp_factor = Complex64::new((k * s_dot_diff).cos(), -(k * s_dot_diff).sin());
627                t_matrix[[p, j]] = exp_factor * sphere_weights[p];
628            }
629        }
630
631        t_matrix
632    });
633
634    system.t_matrices = t_matrices;
635}
636
637/// Build D-matrices (cluster to cluster translation) - parallelized
638///
639/// The D-matrix is diagonal in the single-level FMM, so we only store
640/// the diagonal entries. This reduces memory from O(P²) to O(P) per pair
641/// where P is the number of sphere integration points.
642fn build_d_matrices(
643    system: &mut SlfmmSystem,
644    clusters: &[Cluster],
645    physics: &PhysicsParams,
646    _sphere_coords: &[[f64; 3]],
647    n_terms: usize,
648) {
649    let k = physics.wave_number;
650    let num_sphere_points = system.num_sphere_points;
651
652    // Collect all far cluster pairs
653    let mut far_pairs: Vec<(usize, usize)> = Vec::new();
654    for (i, cluster_i) in clusters.iter().enumerate() {
655        for &j in &cluster_i.far_clusters {
656            far_pairs.push((i, j));
657        }
658    }
659
660    // Log memory estimate
661    let num_pairs = far_pairs.len();
662    let mem_per_pair = num_sphere_points * std::mem::size_of::<Complex64>();
663    let total_mem_mb = (num_pairs * mem_per_pair) as f64 / (1024.0 * 1024.0);
664    if total_mem_mb > 100.0 {
665        eprintln!(
666            "    D-matrices: {} far pairs × {} points = {:.1} MB",
667            num_pairs, num_sphere_points, total_mem_mb
668        );
669    }
670
671    // Compute all D-matrices in parallel (diagonal storage only)
672    let d_matrices: Vec<DMatrixEntry> = parallel_map(&far_pairs, |&(i, j)| {
673        let cluster_i = &clusters[i];
674        let cluster_j = &clusters[j];
675
676        // Distance vector between cluster centers
677        let diff = [
678            cluster_i.center[0] - cluster_j.center[0],
679            cluster_i.center[1] - cluster_j.center[1],
680            cluster_i.center[2] - cluster_j.center[2],
681        ];
682        let r = (diff[0] * diff[0] + diff[1] * diff[1] + diff[2] * diff[2]).sqrt();
683        let kr = k * r;
684
685        // Compute translation using spherical Hankel functions
686        let h_funcs = spherical_hankel_first_kind(n_terms.max(2), kr, 1.0);
687
688        // D-matrix is diagonal: D[p,p] = h_0(kr) * ik
689        // Store only the diagonal (all entries are the same in this simplified model)
690        let d_value = h_funcs[0] * Complex64::new(0.0, k);
691        let diagonal = Array1::from_elem(num_sphere_points, d_value);
692
693        DMatrixEntry {
694            source_cluster: i,
695            field_cluster: j,
696            diagonal,
697        }
698    });
699
700    system.d_matrices = d_matrices;
701}
702
703/// Build S-matrices (cluster multipole to element evaluation) - parallelized
704fn build_s_matrices(
705    system: &mut SlfmmSystem,
706    elements: &[Element],
707    clusters: &[Cluster],
708    physics: &PhysicsParams,
709    sphere_coords: &[[f64; 3]],
710    sphere_weights: &[f64],
711) {
712    let k = physics.wave_number;
713    let num_sphere_points = sphere_coords.len();
714
715    // Build all S-matrices in parallel
716    let s_matrices: Vec<Array2<Complex64>> = parallel_map(clusters, |cluster| {
717        let num_elem = cluster.element_indices.len();
718        let mut s_matrix = Array2::zeros((num_elem, num_sphere_points));
719
720        for (j, &elem_idx) in cluster.element_indices.iter().enumerate() {
721            let elem = &elements[elem_idx];
722            if elem.property.is_evaluation() {
723                continue;
724            }
725
726            // Precompute difference vector
727            let diff: [f64; 3] = [
728                elem.center[0] - cluster.center[0],
729                elem.center[1] - cluster.center[1],
730                elem.center[2] - cluster.center[2],
731            ];
732
733            // For each integration direction on the unit sphere
734            for (p, coord) in sphere_coords.iter().enumerate() {
735                let s_dot_diff = coord[0] * diff[0] + coord[1] * diff[1] + coord[2] * diff[2];
736                let exp_factor = Complex64::new((k * s_dot_diff).cos(), (k * s_dot_diff).sin());
737                s_matrix[[j, p]] = exp_factor * sphere_weights[p];
738            }
739        }
740
741        s_matrix
742    });
743
744    system.s_matrices = s_matrices;
745}
746
747/// Get element node coordinates as Array2
748fn get_element_coords(element: &Element, nodes: &Array2<f64>) -> Array2<f64> {
749    let num_nodes = element.connectivity.len();
750    let mut coords = Array2::zeros((num_nodes, 3));
751
752    for (i, &node_idx) in element.connectivity.iter().enumerate() {
753        for j in 0..3 {
754            coords[[i, j]] = nodes[[node_idx, j]];
755        }
756    }
757
758    coords
759}
760
761#[cfg(test)]
762mod tests {
763    use super::*;
764    use crate::core::types::{BoundaryCondition, ElementProperty, ElementType};
765    use ndarray::array;
766
767    fn make_test_cluster() -> Cluster {
768        let mut cluster = Cluster::new(array![0.5, 0.5, 0.0]);
769        cluster.element_indices = vec![0, 1];
770        cluster.near_clusters = vec![];
771        cluster.far_clusters = vec![];
772        cluster
773    }
774
775    fn make_test_elements() -> (Vec<Element>, Array2<f64>) {
776        let nodes = Array2::from_shape_vec(
777            (4, 3),
778            vec![0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.5, 1.0, 0.0, 1.5, 1.0, 0.0],
779        )
780        .unwrap();
781
782        let elem0 = Element {
783            connectivity: vec![0, 1, 2],
784            element_type: ElementType::Tri3,
785            property: ElementProperty::Surface,
786            normal: array![0.0, 0.0, 1.0],
787            node_normals: Array2::zeros((3, 3)),
788            center: array![0.5, 1.0 / 3.0, 0.0],
789            area: 0.5,
790            boundary_condition: BoundaryCondition::Velocity(vec![Complex64::new(1.0, 0.0)]),
791            group: 0,
792            dof_addresses: vec![0],
793        };
794
795        let elem1 = Element {
796            connectivity: vec![1, 3, 2],
797            element_type: ElementType::Tri3,
798            property: ElementProperty::Surface,
799            normal: array![0.0, 0.0, 1.0],
800            node_normals: Array2::zeros((3, 3)),
801            center: array![1.0, 2.0 / 3.0, 0.0],
802            area: 0.5,
803            boundary_condition: BoundaryCondition::Velocity(vec![Complex64::new(0.0, 0.0)]),
804            group: 0,
805            dof_addresses: vec![1],
806        };
807
808        (vec![elem0, elem1], nodes)
809    }
810
811    #[test]
812    fn test_slfmm_system_creation() {
813        let system = SlfmmSystem::new(10, 2, 32, 5);
814        assert_eq!(system.num_dofs, 10);
815        assert_eq!(system.num_sphere_points, 32);
816        assert_eq!(system.num_expansion_terms, 5);
817    }
818
819    #[test]
820    fn test_build_slfmm_system() {
821        let (elements, nodes) = make_test_elements();
822        let cluster = make_test_cluster();
823        let clusters = vec![cluster];
824        let physics = PhysicsParams::new(100.0, 343.0, 1.21, false);
825
826        let system = build_slfmm_system(&elements, &nodes, &clusters, &physics, 4, 8, 5);
827
828        assert_eq!(system.num_dofs, 2);
829        assert_eq!(system.t_matrices.len(), 1);
830        assert_eq!(system.s_matrices.len(), 1);
831    }
832
833    #[test]
834    fn test_near_field_block() {
835        let (elements, nodes) = make_test_elements();
836        let physics = PhysicsParams::new(100.0, 343.0, 1.21, false);
837        let gamma = Complex64::new(physics.gamma(), 0.0);
838        let tau = Complex64::new(physics.tau, 0.0);
839        let beta = physics.burton_miller_beta();
840
841        let block = compute_near_block(
842            &elements,
843            &nodes,
844            &[0, 1],
845            &[0, 1],
846            &physics,
847            gamma,
848            tau,
849            beta,
850            true,
851        );
852
853        assert_eq!(block.shape(), &[2, 2]);
854        // Diagonal entries should be non-zero
855        assert!(block[[0, 0]].norm() > 0.0);
856        assert!(block[[1, 1]].norm() > 0.0);
857    }
858}