ferreus_bbfmm 0.1.0

Parallel black box fast multipole method (BBFMM) library
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
/////////////////////////////////////////////////////////////////////////////////////////////
//
// Builds Chebyshev interpolation operators and compressed M2L transfer operators for BBFMM.
//
// Created on: 15 Nov 2025     Author: Daniel Owen 
//
// Copyright (c) 2025, Maptek Pty Ltd. All rights reserved. Licensed under the MIT License. 
//
/////////////////////////////////////////////////////////////////////////////////////////////

use crate::{
    aca,
    bbfmm::{M2LCompressionType, PrecomputeOperators},
    KernelFunction,
    utils,
};
use faer::{Mat, RowRef, row::generic::Row};
use itertools::{Itertools, iproduct};
use rayon::iter::{IntoParallelIterator, ParallelIterator};
use std::collections::HashMap;

// # References
// [1] W. Fong, E. Darve, The black-box fast multipole method, Journal of Computational Physics 228 (23) (2009) 8712-8725.
// [2] M. Messner, B. Bramas, O. Coulaud, E. Darve, Optimized M2L Kernels for the Chebyshev Interpolation based Fast Multipole Method (2012).

/// Generates Chebyshev nodes between -1 and 1 of T_n(x) for the given interpolation order.
fn generate_chebyshev_nodes(interpolation_order: &usize) -> Vec<f64> {
    (0..*interpolation_order)
        .rev()
        .map(|i| {
            let theta = std::f64::consts::PI * (i as f64 + 0.5) / *interpolation_order as f64;
            theta.cos()
        })
        .collect()
}

/// Calculates Tn(x), the chebyshev polynomials of the first kind,
/// for k between 0 and interpolation order -1 inclusive.
///
/// Uses the recurrence relation:
///  T_0(x) = 1 \n
///  T_1(x) = x \n
///  T_{n+1}(x) = 2xT_n(x) - T_{n-1}(x) for n > 1
fn evaluate_chebyshev_polynomials(
    num_columns: &usize,
    num_rows: &usize,
    nodes: &Vec<f64>,
) -> Mat<f64> {
    let mut tn_x = Mat::<f64>::zeros(*num_rows as usize, *num_columns as usize);

    for i in 0..*num_rows {
        for j in 0..*num_columns {
            let value = if j == 0 {
                1.0
            } else if j == 1 {
                nodes[i]
            } else {
                2.0 * nodes[i] * tn_x.get(i, j - 1) - tn_x.get(i, j - 2)
            };
            tn_x[(i, j)] = value;
        }
    }

    tn_x
}

/// Applies the linear transformation to normalize Chebyshev evaluations into interpolation basis.
/// Returns S(x) matrix used for scaling/interpolating.
fn calculate_sn(
    tn_x: Mat<f64>,
    polynomial_nodes: &Mat<f64>,
    interpolation_order: &usize,
) -> Mat<f64> {
    let mut sn = tn_x * &polynomial_nodes.transpose();

    sn.col_iter_mut().for_each(|col| {
        col.iter_mut().for_each(|element| {
            *element = (*element * 2.0 - 1.0) / *interpolation_order as f64;
        });
    });
    sn
}

/// Calculates the Chebyshev weights to transfer from the Chebyshev nodes of a
/// parent cell to the Chebyshev nodes of its children.
fn get_cheb_transfer_from_parent_to_children(
    interpolation_order: &usize,
    cheb_nodes: &Vec<f64>,
    polynomial_nodes: &Mat<f64>,
) -> Mat<f64> {
    let num_child_cheb_nodes = 2 * *interpolation_order as usize;

    // Initialise the one dimensional transfer array.
    // So, using an interpolation order of 3 as an example in one dimension, we go from
    // parent_cheb_nodes = [0.866, 6.12E-17, -0.866] to
    // child_cheb_nodes = [-0.067, -0.5, -0.933, 0.933, 0.5, 0.067]
    let child_cheb_nodes = (0..num_child_cheb_nodes)
        .map(|i| {
            let child = if i < *interpolation_order {
                // Calculate the cheb nodes for the first child (left side) in one dimension.
                cheb_nodes[i] - 1.0
            } else {
                // Calculate the cheb nodes for the second child (right side) in one dimension.
                cheb_nodes[i - *interpolation_order] + 1.0
            };
            child * 0.5
        })
        .collect();

    // Evaluate the Chebyshev polynomials at the child Chebyshev nodes.
    let tn_x = evaluate_chebyshev_polynomials(
        &interpolation_order,
        &num_child_cheb_nodes,
        &child_cheb_nodes,
    );

    // Calculate the sum of the Chebyshev polynomials.
    calculate_sn(tn_x, &polynomial_nodes, &interpolation_order)
}

/// Returns the relative Morton offsets in an N-dimensional hypercube.
pub fn calculate_relative_offsets_morton(dim: &usize) -> Mat<usize> {
    let num_children = 2usize.pow(*dim as u32);

    let output = Mat::<usize>::from_fn(num_children, *dim, |i, j| match (i & (1 << j)) != 0 {
        true => 1,
        false => 0,
    });

    output
}

/// Calculates the Chebyshev weights to transfer from the Chebyshev nodes of the
/// children of a parent cell to the Chebyshev nodes of the parent cell.
fn get_m2m_transfer_matrices(
    interpolation_order: &usize,
    cheb_nodes: &Vec<f64>,
    polynomial_nodes: &Mat<f64>,
    dimensions: &usize,
    num_child_cells: &usize,
) -> Vec<Mat<f64>> {
    // Get the transfer weights to go from cheb nodes of parent to cheb nodes of children.
    let sn = get_cheb_transfer_from_parent_to_children(
        &interpolation_order,
        &cheb_nodes,
        &polynomial_nodes,
    );

    // Split the transfer weights array in half row wise.
    let child_transfers = sn.split_at_row(*interpolation_order);

    // Get an array of the unique transfer vectors for well separated cells based on Morton order.
    let m2m_transfer_vector_array: Mat<usize> = calculate_relative_offsets_morton(&dimensions);

    // Loop for each child cell
    (0..*num_child_cells)
        .map(|i| {
            // Tensor product of transfers in each dimension.
            let l2l_transfer = m2m_transfer_vector_array
                .row(i)
                .iter()
                .map(|&j| {
                    if j == 0 {
                        &child_transfers.0
                    } else {
                        &child_transfers.1
                    }
                })
                .fold(None, |acc: Option<Mat<f64>>, mat| {
                    Some(match acc {
                        Some(prev) => prev.kron(mat.as_ref()),
                        None => mat.to_owned(),
                    })
                })
                .unwrap();

            l2l_transfer.transpose().to_owned()
        })
        .collect()
}

/// Generates all possible M2L vectors in [-3,3]^d and a set of unique reference vectors from which
/// all other interactions can be realised by way of symmetry permutations.
/// Reference vectors are defined in [2] for the 3D case, as:
///  t1, t2, t3 >= 0 &
///  t1 >= t2 >= t3 >= 0
///
///  For 2D and 3D this equates to:
///      2D                        3D
///    [2, 0]                  [2, 0, 0]
///    [2, 1]                  [2, 1, 0]
///    [2, 2]                  [2, 2, 0]
///    [3, 0]                  [3, 0, 0]
///    [3, 1]                  [3, 1, 0]
///    [3, 2]                  [3, 2, 0]
///    [3, 3]                  [3, 3, 0]
///                            [2, 1, 1]
///                            [2, 2, 1]
///                            [3, 1, 1]
///                            [3, 2, 1]
///                            [3, 3, 1]
///                            [2, 2, 2]
///                            [3, 2, 2]
///                            [3, 3, 2]
///                            [3, 3, 3]
fn get_m2l_vectors(dimensions: &usize) -> (Mat<i32>, Mat<i32>) {
    let vector_range: Vec<i32> = (-3..4).into_iter().collect();
    let all_m2l_vectors = utils::cartesian_product::<i32>(&vector_range, *dimensions);
    let mut reference_m2l_vectors: Vec<i32> = Vec::new();

    let base_vector_range: Vec<i32> = (0..4).into_iter().collect();
    let base_vectors = utils::cartesian_product::<i32>(&base_vector_range, *dimensions);

    base_vectors.row_iter().for_each(|row| {
        if row[0] >= 2 {
            let mut valid = true;
            for i in 1..*dimensions {
                if row[i] > row[i - 1] {
                    valid = false;
                    break;
                }
            }
            if valid {
                reference_m2l_vectors.extend(row.iter());
            }
        }
    });

    let num_rows = reference_m2l_vectors.len() / dimensions;

    let reference_m2l_vectors = Mat::from_fn(num_rows, *dimensions, |i, j| {
        reference_m2l_vectors[i * *dimensions + j]
    });

    (all_m2l_vectors, reference_m2l_vectors)
}

/// Maps from multi-index to rows and columns of K matrix, as per section 3.1 of [2].
fn map_multi_index_to_k(alpha: &Vec<usize>, interpolation_order: &usize) -> usize {
    let alpha_length = alpha.len();
    let m_alpha: usize;

    if alpha_length == 1 {
        m_alpha = alpha[0] - 1;
    } else if alpha_length == 2 {
        m_alpha = (alpha[0] - 1) * interpolation_order + (alpha[1] - 1);
    } else {
        m_alpha = (alpha[0] - 1) * interpolation_order.pow(2)
            + (alpha[1] - 1) * interpolation_order
            + (alpha[2] - 1);
    }

    m_alpha
}

/// Permutes the multi-index for the axial symmetry case, as per equation 21 in [2].
fn permute_multi_index_axial(
    alpha: &RowRef<usize>,
    transfer_vector: RowRef<i32>,
    interpolation_order: &usize,
) -> Vec<usize> {
    let mut pi_a_t: Vec<usize> = Vec::new();

    alpha.iter().enumerate().for_each(|(idx, a_i)| {
        let value: usize;
        if transfer_vector[idx] < 0 {
            value = interpolation_order - (a_i - 1);
        } else {
            value = *a_i;
        }
        pi_a_t.push(value);
    });

    pi_a_t
}

/// Permutes the multi-index for the diagonal symmetry case, as per equation 22 in [2].
fn permute_multi_index_diagonal(alpha: &RowRef<usize>, sort_indices: &[usize]) -> Vec<usize> {
    // Reorder alpha based on the sorted indices
    sort_indices.iter().map(|&i| alpha[i]).collect()
}

/// Computes permutation indices that map a permuted multi-index to row and column indices in the K matrix
/// as per equations 26 and 27 from [2].
fn calculate_diagonal_axial_permutation_indices(
    transfer_vector: Option<RowRef<i32>>,
    sort_indices: Option<&[usize]>,
    interpolation_order: &usize,
    multi_indices: &Mat<usize>,
    diagonal: bool,
) -> Vec<usize> {
    let size = multi_indices.shape().0;
    let mut permutation_indices: Vec<usize> = vec![0; size];
    let mut alpha: RowRef<usize>;
    let mut alpha_prime: Vec<usize>;
    let mut i: usize;

    for j in 0..size {
        alpha = multi_indices.row(j);

        if diagonal {
            alpha_prime = permute_multi_index_diagonal(&alpha, &sort_indices.unwrap());
        } else {
            alpha_prime =
                permute_multi_index_axial(&alpha, transfer_vector.unwrap(), &interpolation_order)
        }

        i = map_multi_index_to_k(&alpha_prime, &interpolation_order);

        permutation_indices[i] = j;
    }

    permutation_indices
}

/// Finds matching rows in `all_m2l_vectors` that correspond to each
/// axis flip permutations.
fn lookup_axial_permutation_cases(
    axis_sign_permutations: &Mat<i32>,
    all_m2l_vectors: &Mat<i32>,
) -> Vec<usize> {
    let mut matching_indices: Vec<usize> = Vec::new();

    for vector in all_m2l_vectors.row_iter() {
        let flip_axes = Row::from_iter(vector.iter().map(|axis| if *axis < 0 { -1 } else { 1 }));

        for (idx, permutation) in axis_sign_permutations.row_iter().enumerate() {
            if flip_axes == permutation {
                matching_indices.push(idx);
                break;
            }
        }
    }

    matching_indices
}

/// Finds matching rows in `all_m2l_vectors` that correspond to each
/// diagonal flip permutations.
fn lookup_diagonal_permutation_cases(
    axis_order_permutations: &Vec<Vec<usize>>,
    all_m2l_vectors: &Mat<i32>,
) -> Vec<usize> {
    let m2l_vector_sorted_axes: Vec<Vec<usize>> = all_m2l_vectors
        .row_iter()
        .map(|row| {
            let abs_row_vec: Vec<i32> = row.iter().map(|axis| axis.abs() * -1).collect();
            utils::argsort(&abs_row_vec)
        })
        .collect();

    let mut matching_indices: Vec<usize> = Vec::new();

    for sort_axes in m2l_vector_sorted_axes.iter() {
        for (idx, permutation) in axis_order_permutations.iter().enumerate() {
            if sort_axes == permutation {
                matching_indices.push(idx);
                break;
            }
        }
    }

    matching_indices
}

/// Combines axial + diagonal permutation cases to index into precomputed reference operators.
fn lookup_combined_permutation_cases(
    combined_permutation_cases: &Vec<Vec<usize>>,
    permutation_combinations: &Vec<Vec<usize>>,
) -> Vec<usize> {
    let mut matching_indices: Vec<usize> = Vec::new();

    for case in combined_permutation_cases.iter() {
        for (idx, combination) in permutation_combinations.iter().enumerate() {
            if combination == case {
                matching_indices.push(idx);
                break;
            }
        }
    }

    matching_indices
}

/// Finds the indices of the relevant reference M2L vector for all possible M2L vectors
/// as per equations 26 and 27 in [2].
fn lookup_reference_vectors(
    all_m2l_vectors: &Mat<i32>,
    reference_m2l_vectors: &Mat<i32>,
) -> Vec<usize> {
    let mut matching_indices: Vec<usize> = vec![0; all_m2l_vectors.shape().0];

    // Sort the absolute values of each row in both arrays
    let mut sorted_reference_vectors = reference_m2l_vectors.clone();

    sorted_reference_vectors.row_iter_mut().for_each(|row| {
        let mut sorted_row_vec = Vec::from_iter(row.cloned().iter().cloned());
        sorted_row_vec.sort();
        row.iter_mut()
            .enumerate()
            .for_each(|(idx, val)| *val = sorted_row_vec[idx]);
    });

    for (global_idx, vector) in all_m2l_vectors.row_iter().enumerate() {
        let transfer_vec = {
            let mut sorted_vec: Vec<i32> = vector.iter().map(|i| i.abs()).collect();
            sorted_vec.sort();

            Row::from_iter(sorted_vec)
        };

        // Find the index of the first matching row in the reference array for each row in the transfer array
        for (idx, reference) in sorted_reference_vectors.row_iter().enumerate() {
            if transfer_vec == reference {
                matching_indices[global_idx] = idx;
                break;
            }
        }
    }

    matching_indices
}

/// Precomputes the full set of permutation lookup tables to use in exploiting symmetries in M2L operators.
fn get_permutation_lookups(
    dimensions: &usize,
    interpolation_order: &usize,
    all_m2l_vectors: &Mat<i32>,
    reference_m2l_vectors: &Mat<i32>,
) -> (Vec<Vec<usize>>, Vec<Vec<usize>>, Vec<usize>, Vec<usize>) {
    let dimension_range = 0..*dimensions;
    let len_dim_range = dimension_range.len();

    let axis_order_permutations: Vec<Vec<usize>> = dimension_range
        .into_iter()
        .permutations(len_dim_range)
        .unique()
        .collect();

    let axis_sign_permutations =
        utils::cartesian_product::<i32>(&vec![-1, 1], *dimensions);

    let num_axis_order_permutations: usize = axis_order_permutations.len();
    let num_axis_sign_permutations = axis_sign_permutations.shape().0;

    let multi_indices_range: Vec<usize> = (1..interpolation_order + 1).collect();

    let multi_indices =
        utils::cartesian_product::<usize>(&multi_indices_range, *dimensions);

    let unique_diagonal_permutation_indices: Vec<Vec<usize>> = axis_order_permutations
        .iter()
        .map(|i| {
            calculate_diagonal_axial_permutation_indices(
                None,
                Some(i),
                &interpolation_order,
                &multi_indices,
                true,
            )
        })
        .collect();

    let unique_axial_permutation_indices: Vec<Vec<usize>> = axis_sign_permutations
        .row_iter()
        .map(|i| {
            calculate_diagonal_axial_permutation_indices(
                Some(i),
                None,
                &interpolation_order,
                &multi_indices,
                false,
            )
        })
        .collect();

    let permutation_combinations: Vec<Vec<usize>> = iproduct!(
        0..num_axis_sign_permutations,
        0..num_axis_order_permutations
    )
    .into_iter()
    .map(|(a, b)| vec![a, b])
    .collect();

    let combined_permutation_indices: Vec<Vec<usize>> = permutation_combinations
        .iter()
        .map(|permutation| {
            let axial_permutation = &unique_axial_permutation_indices[permutation[0]];
            let diagonal_permutation = &unique_diagonal_permutation_indices[permutation[1]];
            let combo: Vec<usize> = diagonal_permutation
                .iter()
                .map(|i| axial_permutation[*i])
                .collect();
            combo
        })
        .collect();

    let inverse_permutations: Vec<Vec<usize>> = combined_permutation_indices
        .iter()
        .map(|permutation| utils::argsort(&permutation))
        .collect();

    let axial_permutation_cases =
        lookup_axial_permutation_cases(&axis_sign_permutations, all_m2l_vectors);

    let diagonal_permutation_cases =
        lookup_diagonal_permutation_cases(&axis_order_permutations, all_m2l_vectors);

    let combined_permutation_cases: Vec<Vec<usize>> = axial_permutation_cases
        .iter()
        .zip(diagonal_permutation_cases.iter())
        .map(|(a, b)| vec![*a, *b])
        .collect();

    let permutation_lookups =
        lookup_combined_permutation_cases(&combined_permutation_cases, &permutation_combinations);

    let reference_vector_lookups = lookup_reference_vectors(all_m2l_vectors, reference_m2l_vectors);

    (
        combined_permutation_indices,
        inverse_permutations,
        permutation_lookups,
        reference_vector_lookups,
    )
}

/// Generates Chebyshev target points in a unit hypercube based on the interpolation order.
fn generate_chebyshev_target_points(
    nodes: &Vec<f64>,
    dimensions: &usize,
    length: &f64,
) -> Mat<f64> {
    let mut chebyshev_target_points =
        utils::cartesian_product::<f64>(&nodes, *dimensions);

    chebyshev_target_points.row_iter_mut().for_each(|row| {
        row.iter_mut().for_each(|element| *element *= 0.5 * length);
    });

    chebyshev_target_points
}

/// Generates Chebyshev source points for the observation cell in the unit
/// hypercube based on the interpolation order.
fn generate_chebyshev_source_points(
    nodes: &Vec<f64>,
    reference_m2l_vector: RowRef<i32>,
    interpolation_order: &usize,
    dimensions: &usize,
    length: &f64,
) -> Mat<f64> {
    let locations_vec: Vec<f64> = (0..*interpolation_order)
        .into_iter()
        .map(|i| i as f64)
        .collect();

    let mut chebyshev_source_points =
        utils::cartesian_product::<f64>(&locations_vec, *dimensions);

    chebyshev_source_points.row_iter_mut().for_each(|row| {
        row.iter_mut().enumerate().for_each(|(idx, item)| {
            let initial_val = *item;
            *item =
                (reference_m2l_vector[idx] as f64 + (nodes[initial_val as usize] * 0.5)) * length;
        });
    });

    chebyshev_source_points
}

/// Precomputes all necessary interpolation and translation operators for the
/// Chebyshev-based Black-Box Fast Multipole Method (BBFMM).
///
/// This function is intended to be called once during initialization and reused
/// throughout the FMM evaluation. It constructs operators used in both the
/// upward (M2M) and downward (M2L, L2L) passes of the algorithm, as well as
/// precomputing permutation and reference data to enable symmetry exploitation
/// in the far-field interactions.
///
/// # Arguments
///
/// * `interpolation_order` - The number of Chebyshev nodes per axis (usually denoted `p`).
/// * `dimensions` - Number of spatial dimensions (e.g., 1, 2, or 3).
/// * `radius` - Half-width of the root node bounding box (defines cell scale).
/// * `depth` - Maximum depth of the FMM octree.
/// * `kernel_function` - Function handle for evaluating the kernel.
/// * `use_svd_compression` - Whether to compress M2L operators using ACA and SVD recompression.
///
/// # Returns
///
/// PrecomputeOperators - Struct containing precomputed approximation operators.
pub fn precompute_approximation_operators<K: KernelFunction + Send + Sync>(
    interpolation_order: &usize,
    dimensions: &usize,
    radius: &f64,
    depth: &u64,
    kernel: &K,
    compression_type: &M2LCompressionType,
    epsilon: &f64,
) -> PrecomputeOperators {
    let num_child_cells = (2 as usize).pow(*dimensions as u32);
    let num_nodes_nd = interpolation_order.pow(*dimensions as u32);

    // Generate the one dimensional Chebyshev nodes.
    let nodes = generate_chebyshev_nodes(&interpolation_order);

    // Generate the Chebyshev nodes for the given dimensions.
    let nodes_nd = utils::cartesian_product::<f64>(&nodes, *dimensions);

    // Evaluate the Chebyshev polynomials of the first kind at the nodes.
    let polynomial_nodes =
        evaluate_chebyshev_polynomials(&interpolation_order, &interpolation_order, &nodes);

    // Get the matrices of multipole to multipole transfer coefficients (Chebyshev weights).
    let m2m_transfer_matrices = get_m2m_transfer_matrices(
        &interpolation_order,
        &nodes,
        &polynomial_nodes,
        &dimensions,
        &num_child_cells,
    );

    // Get an array of all possible M2L transfer vectors and
    // the M2L vectors for the reference domain.
    let (all_m2l_vectors, reference_m2l_vectors) = get_m2l_vectors(&dimensions);

    // Get all the permutation lookup arrays.
    let (permutation_indices, inverse_permutations, permutation_lookups, reference_vector_lookups) =
        get_permutation_lookups(
            &dimensions,
            &interpolation_order,
            &all_m2l_vectors,
            &reference_m2l_vectors,
        );

    let num_reference_cells = reference_m2l_vectors.shape().0;

    // Parallel loop to calculate the compressed M2L operators for each level in parallel.
    let level_operators: Vec<(usize, HashMap<usize, Mat<f64>>, HashMap<usize, Mat<f64>>)> = (2
        ..*depth as usize + 1)
        .into_par_iter()
        .map(|level| {
            // Length of the cell at the current level.
            let cell_length_level = *radius / (2usize.pow(level as u32 - 1)) as f64;

            // Generate Chebyshev target points.
            let target_points =
                generate_chebyshev_target_points(&nodes, &dimensions, &cell_length_level);

            // Maps for the reference M2L operators for the current level.
            let mut u_level: HashMap<usize, Mat<f64>> = HashMap::new();
            let mut vt_level: HashMap<usize, Mat<f64>> = HashMap::new();

            for i in 0..num_reference_cells {
                let source_points = generate_chebyshev_source_points(
                    &nodes,
                    reference_m2l_vectors.row(i),
                    &interpolation_order,
                    &dimensions,
                    &cell_length_level,
                );

                match compression_type {
                    M2LCompressionType::ACA => {
                        // Use Adaptive Cross Approximation (ACA) with partial pivoting
                        // followed by an SVD recompression to compress the M2L operators.
                        let num_rows = target_points.shape().0;
                        let num_columns = source_points.shape().0;

                        // Given the way the M2L operators are used in the FMM we need to parse the source and target
                        // points to the generator in the opposite order to what you'd expect.
                        // i.e. The target_points input for the function needs to be the Chebyshev source_points and
                        // the source_points input for the function needs to be the Chebyshev target_points.
                        let matrix_subset_generator =
                            |rows_start: &usize,

                             rows_end: &usize,
                             columns_start: &usize,
                             columns_end: &usize| {
                                utils::get_a_matrix_subset(
                                    &source_points,
                                    &target_points,
                                    kernel,
                                    rows_start,
                                    rows_end,
                                    columns_start,
                                    columns_end,
                                )
                            };

                        let (u, v) = aca::aca_partial_pivoting(
                            num_rows,
                            num_columns,
                            matrix_subset_generator,
                            &epsilon,
                        );

                        let (truncated_u, truncated_vt) = aca::recompress_aca(&u, &v, &epsilon);

                        u_level.insert(i, truncated_u);
                        vt_level.insert(i, truncated_vt);
                    }
                    M2LCompressionType::SVD => {
                        let a_matrix = utils::get_a_matrix(
                            &source_points,
                            &target_points,
                            kernel,
                        );

                        let svd = a_matrix.svd().unwrap();
                        let ur = svd.U(); // Left singular vectors (k × k)
                        let sr = svd.S(); // Singular values (vector of length k)
                        let vrt = svd.V().transpose(); // Right singular vectors transposed (k × k)

                        // Determine new target rank based on cumulative sum of squares
                        let sigma_vec: Vec<f64> = sr.column_vector().iter().cloned().collect();
                        let new_rank = aca::calculate_singular_values_cutoff(sigma_vec, &epsilon);

                        // Recompress: U = Q_u * U_r * diag(sigma), V = V_r^T * Q_v^T
                        let truncated_u = ur.subcols(0, new_rank).to_owned();
                        let truncated_vt = sr.column_vector().subrows(0, new_rank).as_diagonal()
                            * vrt.subrows(0, new_rank);

                        u_level.insert(i, truncated_u);
                        vt_level.insert(i, truncated_vt);
                    }
                    M2LCompressionType::None => {
                        // Create full-size M2L reference operators.
                        let k_values = utils::get_a_matrix(
                            &source_points,
                            &target_points,
                            kernel,
                        );

                        u_level.insert(i, k_values);
                    }
                }
            }

            (level, u_level, vt_level)
        })
        .collect();

    // Aggregate the results into the final HashMaps
    let mut truncated_u: HashMap<usize, HashMap<usize, Mat<f64>>> = HashMap::new();
    let mut truncated_vt: HashMap<usize, HashMap<usize, Mat<f64>>> = HashMap::new();

    for (level, u_map, vt_map) in level_operators {
        truncated_u.insert(level, u_map);
        truncated_vt.insert(level, vt_map);
    }

    PrecomputeOperators {
        num_nodes_nd,
        nodes_nd,
        polynomial_nodes,
        m2m_transfer_matrices,
        u: truncated_u,
        vt: truncated_vt,
        permutation_indices,
        inverse_permutations,
        permutation_lookups,
        reference_vector_lookups,
    }
}

/// Calculates the coefficients to map the points in a cell to the Chebyshev nodes of the cell.
/// # Arguments
///
/// * `interpolation_order` - Number of Chebyshev nodes per axis (e.g. `p`).
/// * `cell_point_locations` - Input points to interpolate (modified in place; shape `N × d`).
/// * `center` - Center coordinates of the cell in `d` dimensions.
/// * `length` - Side length of the cell (assumes cubic cells).
/// * `polynomial_nodes` - Evaluation matrix of Chebyshev polynomials (shape `p × p`).
/// * `dimensions` - Number of spatial dimensions (`d`).
///
/// # Returns
///
/// A matrix of interpolation coefficients (`N × p^d`), where each row contains
/// the tensor-product weights used to interpolate the corresponding input point
/// onto the Chebyshev basis of the cell.
pub fn get_approximation_coefficients(
    interpolation_order: &usize,
    cell_point_locations: &mut Mat<f64>,
    center: &Vec<f64>,
    length: &f64,
    polynomial_nodes: &Mat<f64>,
    dimensions: &usize,
) -> Mat<f64> {
    // Scale points to the [-1, 1]^d hypercube.
    cell_point_locations.row_iter_mut().for_each(|row| {
        row.iter_mut().enumerate().for_each(|(idx, element)| {
            *element = (*element - center[idx]) / (length * 0.5);
        });
    });

    let mut one_d_transfer_coefficients: Vec<Mat<f64>> = Vec::new();

    for d in 0..*dimensions as usize {
        let column_vec = cell_point_locations.col_as_slice(d).to_vec();
        let tn_x = evaluate_chebyshev_polynomials(
            &interpolation_order,
            &cell_point_locations.nrows(),
            &column_vec,
        );
        let sn = calculate_sn(tn_x, &polynomial_nodes, &interpolation_order);

        one_d_transfer_coefficients.push(sn);
    }

    let num_columns = interpolation_order.pow(*dimensions as u32);
    let indices: Vec<usize> = (0..*interpolation_order).into_iter().collect();

    let tensor_product_indices =
        utils::cartesian_product::<usize>(&indices, *dimensions);

    let mut transfer_coefficients = Mat::<f64>::zeros(cell_point_locations.nrows(), num_columns);

    transfer_coefficients
        .row_iter_mut()
        .enumerate()
        .for_each(|(i, row)| {
            row.iter_mut().enumerate().for_each(|(idx, element)| {
                *element = 1.0;

                let wanted_indices = tensor_product_indices.row(idx);

                for (d, coeffs) in one_d_transfer_coefficients.iter().enumerate() {
                    let poly_idx = wanted_indices[d];
                    *element *= coeffs[(i, poly_idx)];
                }
            });
        });

    transfer_coefficients
}

/// Scales Chebyshev nodes from the reference domain `[-1, 1]^d`
/// into the physical coordinates of a given FMM cell.
///
/// # Arguments
///
/// * `nodes_nd` - Tensor-product grid of Chebyshev nodes in `[-1, 1]^d` (shape `p^d × d`)
/// * `cell_center` - Coordinates of the center of the destination cell (length `d`)
/// * `cell_length` - Side length of the destination cell
///
/// # Returns
///
/// A new matrix of scaled Chebyshev node coordinates, transformed to lie within
/// the specified cell bounds (shape `p^d × d`).
pub fn scale_cheb_nodes_to_cell(
    nodes_nd: &Mat<f64>,
    cell_center: &Vec<f64>,
    cell_length: &f64,
) -> Mat<f64> {
    let nodes_nd_shape = nodes_nd.shape();

    let mut scaled_cheb_nodes = Mat::<f64>::zeros(nodes_nd_shape.0, nodes_nd_shape.1);

    nodes_nd.row_iter().enumerate().for_each(|(row_idx, row)| {
        row.iter().enumerate().for_each(|(col_idx, element)| {
            let scaled_element = cell_center[col_idx] + (cell_length * 0.5) * element;
            scaled_cheb_nodes[(row_idx, col_idx)] = scaled_element;
        });
    });

    scaled_cheb_nodes
}