fdars_core/
utility.rs

1//! Utility functions for functional data analysis.
2
3use crate::helpers::simpsons_weights;
4use crate::iter_maybe_parallel;
5#[cfg(feature = "parallel")]
6use rayon::iter::ParallelIterator;
7use std::f64::consts::PI;
8
9/// Compute Simpson's rule integration for a single function.
10///
11/// # Arguments
12/// * `values` - Function values at evaluation points
13/// * `argvals` - Evaluation points
14pub fn integrate_simpson(values: &[f64], argvals: &[f64]) -> f64 {
15    if values.len() != argvals.len() || values.is_empty() {
16        return 0.0;
17    }
18
19    let weights = simpsons_weights(argvals);
20    values
21        .iter()
22        .zip(weights.iter())
23        .map(|(&v, &w)| v * w)
24        .sum()
25}
26
27/// Compute inner product between two functional data curves.
28///
29/// # Arguments
30/// * `curve1` - First curve values
31/// * `curve2` - Second curve values
32/// * `argvals` - Evaluation points
33pub fn inner_product(curve1: &[f64], curve2: &[f64], argvals: &[f64]) -> f64 {
34    if curve1.len() != curve2.len() || curve1.len() != argvals.len() || curve1.is_empty() {
35        return 0.0;
36    }
37
38    let weights = simpsons_weights(argvals);
39    curve1
40        .iter()
41        .zip(curve2.iter())
42        .zip(weights.iter())
43        .map(|((&c1, &c2), &w)| c1 * c2 * w)
44        .sum()
45}
46
47/// Compute inner product matrix for functional data.
48///
49/// # Arguments
50/// * `data` - Column-major matrix (n x m)
51/// * `n` - Number of observations
52/// * `m` - Number of evaluation points
53/// * `argvals` - Evaluation points
54///
55/// # Returns
56/// Symmetric inner product matrix (n x n), column-major
57pub fn inner_product_matrix(data: &[f64], n: usize, m: usize, argvals: &[f64]) -> Vec<f64> {
58    if n == 0 || m == 0 || argvals.len() != m || data.len() != n * m {
59        return Vec::new();
60    }
61
62    let weights = simpsons_weights(argvals);
63
64    // Compute upper triangle (parallel when feature enabled)
65    let upper_triangle: Vec<(usize, usize, f64)> = iter_maybe_parallel!(0..n)
66        .flat_map(|i| {
67            (i..n)
68                .map(|j| {
69                    let mut ip = 0.0;
70                    for k in 0..m {
71                        ip += data[i + k * n] * data[j + k * n] * weights[k];
72                    }
73                    (i, j, ip)
74                })
75                .collect::<Vec<_>>()
76        })
77        .collect();
78
79    // Build symmetric matrix
80    let mut result = vec![0.0; n * n];
81    for (i, j, ip) in upper_triangle {
82        result[i + j * n] = ip;
83        result[j + i * n] = ip;
84    }
85
86    result
87}
88
89/// Compute the Adot matrix used in PCvM statistic.
90pub fn compute_adot(n: usize, inprod: &[f64]) -> Vec<f64> {
91    if n == 0 {
92        return Vec::new();
93    }
94
95    let expected_len = (n * n + n) / 2;
96    if inprod.len() != expected_len {
97        return Vec::new();
98    }
99
100    let out_len = (n * n - n + 2) / 2;
101    let mut adot_vec = vec![0.0; out_len];
102
103    adot_vec[0] = PI * (n + 1) as f64;
104
105    // Collect all (i, j) pairs for parallel processing
106    let pairs: Vec<(usize, usize)> = (2..=n).flat_map(|i| (1..i).map(move |j| (i, j))).collect();
107
108    // Compute adot values (parallel when feature enabled)
109    let results: Vec<(usize, f64)> = iter_maybe_parallel!(pairs)
110        .map(|(i, j)| {
111            let mut sumr = 0.0;
112
113            for r in 1..=n {
114                if i == r || j == r {
115                    sumr += PI;
116                } else {
117                    let auxi = i * (i - 1) / 2;
118                    let auxj = j * (j - 1) / 2;
119                    let auxr = r * (r - 1) / 2;
120
121                    let ij = auxi + j - 1;
122                    let ii = auxi + i - 1;
123                    let jj = auxj + j - 1;
124                    let rr = auxr + r - 1;
125
126                    let ir = if i > r { auxi + r - 1 } else { auxr + i - 1 };
127                    let rj = if r > j { auxr + j - 1 } else { auxj + r - 1 };
128                    let jr = rj;
129
130                    let num = inprod[ij] - inprod[ir] - inprod[rj] + inprod[rr];
131                    let aux1 = (inprod[ii] - 2.0 * inprod[ir] + inprod[rr]).sqrt();
132                    let aux2 = (inprod[jj] - 2.0 * inprod[jr] + inprod[rr]).sqrt();
133                    let den = aux1 * aux2;
134
135                    let mut quo = if den.abs() > 1e-10 { num / den } else { 0.0 };
136                    quo = quo.clamp(-1.0, 1.0);
137
138                    sumr += (PI - quo.acos()).abs();
139                }
140            }
141
142            let idx = 1 + ((i - 1) * (i - 2) / 2) + j - 1;
143            (idx, sumr)
144        })
145        .collect();
146
147    // Fill in the results
148    for (idx, val) in results {
149        if idx < adot_vec.len() {
150            adot_vec[idx] = val;
151        }
152    }
153
154    adot_vec
155}
156
157/// Compute the PCvM statistic.
158pub fn pcvm_statistic(adot_vec: &[f64], residuals: &[f64]) -> f64 {
159    let n = residuals.len();
160
161    if n == 0 || adot_vec.is_empty() {
162        return 0.0;
163    }
164
165    let mut sums = 0.0;
166    for i in 2..=n {
167        for j in 1..i {
168            let idx = 1 + ((i - 1) * (i - 2) / 2) + j - 1;
169            if idx < adot_vec.len() {
170                sums += residuals[i - 1] * adot_vec[idx] * residuals[j - 1];
171            }
172        }
173    }
174
175    let diag_sum: f64 = residuals.iter().map(|r| r * r).sum();
176    adot_vec[0] * diag_sum + 2.0 * sums
177}
178
179/// Result of random projection statistics.
180pub struct RpStatResult {
181    /// CvM statistics for each projection
182    pub cvm: Vec<f64>,
183    /// KS statistics for each projection
184    pub ks: Vec<f64>,
185}
186
187/// Compute random projection statistics.
188pub fn rp_stat(proj_x_ord: &[i32], residuals: &[f64], n_proj: usize) -> RpStatResult {
189    let n = residuals.len();
190
191    if n == 0 || n_proj == 0 || proj_x_ord.len() != n * n_proj {
192        return RpStatResult {
193            cvm: Vec::new(),
194            ks: Vec::new(),
195        };
196    }
197
198    // Process projections (parallel when feature enabled)
199    let stats: Vec<(f64, f64)> = iter_maybe_parallel!(0..n_proj)
200        .map(|p| {
201            let mut y = vec![0.0; n];
202            let mut cumsum = 0.0;
203
204            for i in 0..n {
205                let idx = proj_x_ord[p * n + i] as usize;
206                if idx > 0 && idx <= n {
207                    cumsum += residuals[idx - 1];
208                }
209                y[i] = cumsum;
210            }
211
212            let sum_y_sq: f64 = y.iter().map(|yi| yi * yi).sum();
213            let cvm = sum_y_sq / (n * n) as f64;
214
215            let max_abs_y = y.iter().map(|yi| yi.abs()).fold(0.0, f64::max);
216            let ks = max_abs_y / (n as f64).sqrt();
217
218            (cvm, ks)
219        })
220        .collect();
221
222    let cvm_stats: Vec<f64> = stats.iter().map(|(cvm, _)| *cvm).collect();
223    let ks_stats: Vec<f64> = stats.iter().map(|(_, ks)| *ks).collect();
224
225    RpStatResult {
226        cvm: cvm_stats,
227        ks: ks_stats,
228    }
229}
230
231/// k-NN prediction for functional regression.
232pub fn knn_predict(
233    distance_matrix: &[f64],
234    y: &[f64],
235    n_train: usize,
236    n_test: usize,
237    k: usize,
238) -> Vec<f64> {
239    if n_train == 0 || n_test == 0 || k == 0 || y.len() != n_train {
240        return vec![0.0; n_test];
241    }
242
243    let k = k.min(n_train);
244
245    iter_maybe_parallel!(0..n_test)
246        .map(|i| {
247            // Get distances from test point i to all training points
248            let mut distances: Vec<(usize, f64)> = (0..n_train)
249                .map(|j| (j, distance_matrix[i + j * n_test]))
250                .collect();
251
252            // Sort by distance
253            distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
254
255            // Average of k nearest neighbors
256            let sum: f64 = distances.iter().take(k).map(|(j, _)| y[*j]).sum();
257            sum / k as f64
258        })
259        .collect()
260}
261
262/// Compute leave-one-out cross-validation error for k-NN.
263pub fn knn_loocv(distance_matrix: &[f64], y: &[f64], n: usize, k: usize) -> f64 {
264    if n == 0 || k == 0 || y.len() != n || distance_matrix.len() != n * n {
265        return f64::INFINITY;
266    }
267
268    let k = k.min(n - 1);
269
270    let errors: Vec<f64> = iter_maybe_parallel!(0..n)
271        .map(|i| {
272            // Get distances from point i to all other points
273            let mut distances: Vec<(usize, f64)> = (0..n)
274                .filter(|&j| j != i)
275                .map(|j| (j, distance_matrix[i + j * n]))
276                .collect();
277
278            // Sort by distance
279            distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
280
281            // Prediction
282            let pred: f64 = distances.iter().take(k).map(|(j, _)| y[*j]).sum::<f64>() / k as f64;
283
284            // Squared error
285            (y[i] - pred).powi(2)
286        })
287        .collect();
288
289    errors.iter().sum::<f64>() / n as f64
290}
291
292#[cfg(test)]
293mod tests {
294    use super::*;
295
296    fn uniform_grid(n: usize) -> Vec<f64> {
297        (0..n).map(|i| i as f64 / (n - 1) as f64).collect()
298    }
299
300    #[test]
301    fn test_integrate_simpson_constant() {
302        let argvals = uniform_grid(11);
303        let values = vec![1.0; 11];
304        let result = integrate_simpson(&values, &argvals);
305        assert!((result - 1.0).abs() < 1e-10);
306    }
307
308    #[test]
309    fn test_inner_product_orthogonal() {
310        let argvals = uniform_grid(101);
311        let curve1: Vec<f64> = argvals.iter().map(|&t| (2.0 * PI * t).sin()).collect();
312        let curve2: Vec<f64> = argvals.iter().map(|&t| (2.0 * PI * t).cos()).collect();
313        let result = inner_product(&curve1, &curve2, &argvals);
314        assert!(result.abs() < 0.01);
315    }
316
317    #[test]
318    fn test_inner_product_matrix_symmetry() {
319        let n = 5;
320        let m = 10;
321        let argvals = uniform_grid(m);
322        let data: Vec<f64> = (0..n * m).map(|i| (i as f64).sin()).collect();
323
324        let matrix = inner_product_matrix(&data, n, m, &argvals);
325
326        for i in 0..n {
327            for j in 0..n {
328                let diff = (matrix[i + j * n] - matrix[j + i * n]).abs();
329                assert!(diff < 1e-10, "Matrix should be symmetric");
330            }
331        }
332    }
333
334    #[test]
335    fn test_knn_predict() {
336        let n_train = 10;
337        let n_test = 3;
338        let k = 3;
339
340        let mut distance_matrix = vec![0.0; n_test * n_train];
341        for i in 0..n_test {
342            for j in 0..n_train {
343                distance_matrix[i + j * n_test] = ((i as f64) - (j as f64)).abs();
344            }
345        }
346
347        let y: Vec<f64> = (0..n_train).map(|i| i as f64).collect();
348        let predictions = knn_predict(&distance_matrix, &y, n_train, n_test, k);
349
350        assert_eq!(predictions.len(), n_test);
351    }
352}