fdars_core/
utility.rs

1//! Utility functions for functional data analysis.
2
3use crate::helpers::simpsons_weights;
4use rayon::prelude::*;
5use std::f64::consts::PI;
6
7/// Compute Simpson's rule integration for a single function.
8///
9/// # Arguments
10/// * `values` - Function values at evaluation points
11/// * `argvals` - Evaluation points
12pub fn integrate_simpson(values: &[f64], argvals: &[f64]) -> f64 {
13    if values.len() != argvals.len() || values.is_empty() {
14        return 0.0;
15    }
16
17    let weights = simpsons_weights(argvals);
18    values
19        .iter()
20        .zip(weights.iter())
21        .map(|(&v, &w)| v * w)
22        .sum()
23}
24
25/// Compute inner product between two functional data curves.
26///
27/// # Arguments
28/// * `curve1` - First curve values
29/// * `curve2` - Second curve values
30/// * `argvals` - Evaluation points
31pub fn inner_product(curve1: &[f64], curve2: &[f64], argvals: &[f64]) -> f64 {
32    if curve1.len() != curve2.len() || curve1.len() != argvals.len() || curve1.is_empty() {
33        return 0.0;
34    }
35
36    let weights = simpsons_weights(argvals);
37    curve1
38        .iter()
39        .zip(curve2.iter())
40        .zip(weights.iter())
41        .map(|((&c1, &c2), &w)| c1 * c2 * w)
42        .sum()
43}
44
45/// Compute inner product matrix for functional data.
46///
47/// # Arguments
48/// * `data` - Column-major matrix (n x m)
49/// * `n` - Number of observations
50/// * `m` - Number of evaluation points
51/// * `argvals` - Evaluation points
52///
53/// # Returns
54/// Symmetric inner product matrix (n x n), column-major
55pub fn inner_product_matrix(data: &[f64], n: usize, m: usize, argvals: &[f64]) -> Vec<f64> {
56    if n == 0 || m == 0 || argvals.len() != m || data.len() != n * m {
57        return Vec::new();
58    }
59
60    let weights = simpsons_weights(argvals);
61
62    // Compute upper triangle in parallel
63    let upper_triangle: Vec<(usize, usize, f64)> = (0..n)
64        .into_par_iter()
65        .flat_map(|i| {
66            (i..n)
67                .map(|j| {
68                    let mut ip = 0.0;
69                    for k in 0..m {
70                        ip += data[i + k * n] * data[j + k * n] * weights[k];
71                    }
72                    (i, j, ip)
73                })
74                .collect::<Vec<_>>()
75        })
76        .collect();
77
78    // Build symmetric matrix
79    let mut result = vec![0.0; n * n];
80    for (i, j, ip) in upper_triangle {
81        result[i + j * n] = ip;
82        result[j + i * n] = ip;
83    }
84
85    result
86}
87
88/// Compute the Adot matrix used in PCvM statistic.
89pub fn compute_adot(n: usize, inprod: &[f64]) -> Vec<f64> {
90    if n == 0 {
91        return Vec::new();
92    }
93
94    let expected_len = (n * n + n) / 2;
95    if inprod.len() != expected_len {
96        return Vec::new();
97    }
98
99    let out_len = (n * n - n + 2) / 2;
100    let mut adot_vec = vec![0.0; out_len];
101
102    adot_vec[0] = PI * (n + 1) as f64;
103
104    // Collect all (i, j) pairs for parallel processing
105    let pairs: Vec<(usize, usize)> = (2..=n).flat_map(|i| (1..i).map(move |j| (i, j))).collect();
106
107    // Compute adot values in parallel
108    let results: Vec<(usize, f64)> = pairs
109        .into_par_iter()
110        .map(|(i, j)| {
111            let mut sumr = 0.0;
112
113            for r in 1..=n {
114                if i == r || j == r {
115                    sumr += PI;
116                } else {
117                    let auxi = i * (i - 1) / 2;
118                    let auxj = j * (j - 1) / 2;
119                    let auxr = r * (r - 1) / 2;
120
121                    let ij = auxi + j - 1;
122                    let ii = auxi + i - 1;
123                    let jj = auxj + j - 1;
124                    let rr = auxr + r - 1;
125
126                    let ir = if i > r { auxi + r - 1 } else { auxr + i - 1 };
127                    let rj = if r > j { auxr + j - 1 } else { auxj + r - 1 };
128                    let jr = rj;
129
130                    let num = inprod[ij] - inprod[ir] - inprod[rj] + inprod[rr];
131                    let aux1 = (inprod[ii] - 2.0 * inprod[ir] + inprod[rr]).sqrt();
132                    let aux2 = (inprod[jj] - 2.0 * inprod[jr] + inprod[rr]).sqrt();
133                    let den = aux1 * aux2;
134
135                    let mut quo = if den.abs() > 1e-10 { num / den } else { 0.0 };
136                    quo = quo.clamp(-1.0, 1.0);
137
138                    sumr += (PI - quo.acos()).abs();
139                }
140            }
141
142            let idx = 1 + ((i - 1) * (i - 2) / 2) + j - 1;
143            (idx, sumr)
144        })
145        .collect();
146
147    // Fill in the results
148    for (idx, val) in results {
149        if idx < adot_vec.len() {
150            adot_vec[idx] = val;
151        }
152    }
153
154    adot_vec
155}
156
157/// Compute the PCvM statistic.
158pub fn pcvm_statistic(adot_vec: &[f64], residuals: &[f64]) -> f64 {
159    let n = residuals.len();
160
161    if n == 0 || adot_vec.is_empty() {
162        return 0.0;
163    }
164
165    let mut sums = 0.0;
166    for i in 2..=n {
167        for j in 1..i {
168            let idx = 1 + ((i - 1) * (i - 2) / 2) + j - 1;
169            if idx < adot_vec.len() {
170                sums += residuals[i - 1] * adot_vec[idx] * residuals[j - 1];
171            }
172        }
173    }
174
175    let diag_sum: f64 = residuals.iter().map(|r| r * r).sum();
176    adot_vec[0] * diag_sum + 2.0 * sums
177}
178
179/// Result of random projection statistics.
180pub struct RpStatResult {
181    /// CvM statistics for each projection
182    pub cvm: Vec<f64>,
183    /// KS statistics for each projection
184    pub ks: Vec<f64>,
185}
186
187/// Compute random projection statistics.
188pub fn rp_stat(proj_x_ord: &[i32], residuals: &[f64], n_proj: usize) -> RpStatResult {
189    let n = residuals.len();
190
191    if n == 0 || n_proj == 0 || proj_x_ord.len() != n * n_proj {
192        return RpStatResult {
193            cvm: Vec::new(),
194            ks: Vec::new(),
195        };
196    }
197
198    // Process projections in parallel
199    let stats: Vec<(f64, f64)> = (0..n_proj)
200        .into_par_iter()
201        .map(|p| {
202            let mut y = vec![0.0; n];
203            let mut cumsum = 0.0;
204
205            for i in 0..n {
206                let idx = proj_x_ord[p * n + i] as usize;
207                if idx > 0 && idx <= n {
208                    cumsum += residuals[idx - 1];
209                }
210                y[i] = cumsum;
211            }
212
213            let sum_y_sq: f64 = y.iter().map(|yi| yi * yi).sum();
214            let cvm = sum_y_sq / (n * n) as f64;
215
216            let max_abs_y = y.iter().map(|yi| yi.abs()).fold(0.0, f64::max);
217            let ks = max_abs_y / (n as f64).sqrt();
218
219            (cvm, ks)
220        })
221        .collect();
222
223    let cvm_stats: Vec<f64> = stats.iter().map(|(cvm, _)| *cvm).collect();
224    let ks_stats: Vec<f64> = stats.iter().map(|(_, ks)| *ks).collect();
225
226    RpStatResult {
227        cvm: cvm_stats,
228        ks: ks_stats,
229    }
230}
231
232/// k-NN prediction for functional regression.
233pub fn knn_predict(
234    distance_matrix: &[f64],
235    y: &[f64],
236    n_train: usize,
237    n_test: usize,
238    k: usize,
239) -> Vec<f64> {
240    if n_train == 0 || n_test == 0 || k == 0 || y.len() != n_train {
241        return vec![0.0; n_test];
242    }
243
244    let k = k.min(n_train);
245
246    (0..n_test)
247        .into_par_iter()
248        .map(|i| {
249            // Get distances from test point i to all training points
250            let mut distances: Vec<(usize, f64)> = (0..n_train)
251                .map(|j| (j, distance_matrix[i + j * n_test]))
252                .collect();
253
254            // Sort by distance
255            distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
256
257            // Average of k nearest neighbors
258            let sum: f64 = distances.iter().take(k).map(|(j, _)| y[*j]).sum();
259            sum / k as f64
260        })
261        .collect()
262}
263
264/// Compute leave-one-out cross-validation error for k-NN.
265pub fn knn_loocv(distance_matrix: &[f64], y: &[f64], n: usize, k: usize) -> f64 {
266    if n == 0 || k == 0 || y.len() != n || distance_matrix.len() != n * n {
267        return f64::INFINITY;
268    }
269
270    let k = k.min(n - 1);
271
272    let errors: Vec<f64> = (0..n)
273        .into_par_iter()
274        .map(|i| {
275            // Get distances from point i to all other points
276            let mut distances: Vec<(usize, f64)> = (0..n)
277                .filter(|&j| j != i)
278                .map(|j| (j, distance_matrix[i + j * n]))
279                .collect();
280
281            // Sort by distance
282            distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
283
284            // Prediction
285            let pred: f64 = distances.iter().take(k).map(|(j, _)| y[*j]).sum::<f64>() / k as f64;
286
287            // Squared error
288            (y[i] - pred).powi(2)
289        })
290        .collect();
291
292    errors.iter().sum::<f64>() / n as f64
293}
294
295#[cfg(test)]
296mod tests {
297    use super::*;
298
299    fn uniform_grid(n: usize) -> Vec<f64> {
300        (0..n).map(|i| i as f64 / (n - 1) as f64).collect()
301    }
302
303    // ============== Integration tests ==============
304
305    #[test]
306    fn test_integrate_simpson_constant() {
307        // Integral of constant 2 over [0, 1] should be 2
308        let argvals = uniform_grid(21);
309        let values: Vec<f64> = vec![2.0; 21];
310
311        let result = integrate_simpson(&values, &argvals);
312        assert!(
313            (result - 2.0).abs() < 0.01,
314            "Integral of constant 2 should be 2, got {}",
315            result
316        );
317    }
318
319    #[test]
320    fn test_integrate_simpson_linear() {
321        // Integral of x over [0, 1] should be 0.5
322        let argvals = uniform_grid(51);
323        let values: Vec<f64> = argvals.clone();
324
325        let result = integrate_simpson(&values, &argvals);
326        assert!(
327            (result - 0.5).abs() < 0.01,
328            "Integral of x should be 0.5, got {}",
329            result
330        );
331    }
332
333    #[test]
334    fn test_integrate_simpson_invalid() {
335        assert!(integrate_simpson(&[], &[]).abs() < 1e-10);
336        assert!(integrate_simpson(&[1.0, 2.0], &[0.0]).abs() < 1e-10);
337    }
338
339    // ============== Inner product tests ==============
340
341    #[test]
342    fn test_inner_product_orthogonal() {
343        let argvals = uniform_grid(101);
344        // sin(2*pi*x) and sin(4*pi*x) are orthogonal on [0,1]
345        let curve1: Vec<f64> = argvals.iter().map(|&t| (2.0 * PI * t).sin()).collect();
346        let curve2: Vec<f64> = argvals.iter().map(|&t| (4.0 * PI * t).sin()).collect();
347
348        let ip = inner_product(&curve1, &curve2, &argvals);
349        assert!(
350            ip.abs() < 0.05,
351            "Orthogonal functions should have near-zero inner product, got {}",
352            ip
353        );
354    }
355
356    #[test]
357    fn test_inner_product_self_positive() {
358        let argvals = uniform_grid(51);
359        let curve: Vec<f64> = argvals.iter().map(|&t| (2.0 * PI * t).sin()).collect();
360
361        let ip = inner_product(&curve, &curve, &argvals);
362        assert!(ip > 0.0, "Self inner product should be positive");
363        // Integral of sin^2(2*pi*x) over [0,1] is 0.5
364        assert!((ip - 0.5).abs() < 0.05, "Expected ~0.5, got {}", ip);
365    }
366
367    #[test]
368    fn test_inner_product_invalid() {
369        assert!(inner_product(&[], &[], &[]).abs() < 1e-10);
370        assert!(inner_product(&[1.0], &[1.0, 2.0], &[0.0]).abs() < 1e-10);
371    }
372
373    // ============== Inner product matrix tests ==============
374
375    #[test]
376    fn test_inner_product_matrix_symmetric() {
377        let n = 5;
378        let m = 21;
379        let argvals = uniform_grid(m);
380
381        // Create simple test data
382        let mut data = vec![0.0; n * m];
383        for i in 0..n {
384            for j in 0..m {
385                data[i + j * n] = (i as f64 + 1.0) * argvals[j];
386            }
387        }
388
389        let matrix = inner_product_matrix(&data, n, m, &argvals);
390        assert_eq!(matrix.len(), n * n);
391
392        // Check symmetry
393        for i in 0..n {
394            for j in 0..n {
395                assert!(
396                    (matrix[i + j * n] - matrix[j + i * n]).abs() < 1e-10,
397                    "Matrix should be symmetric"
398                );
399            }
400        }
401    }
402
403    #[test]
404    fn test_inner_product_matrix_diagonal_positive() {
405        let n = 3;
406        let m = 21;
407        let argvals = uniform_grid(m);
408
409        let mut data = vec![0.0; n * m];
410        for i in 0..n {
411            for j in 0..m {
412                data[i + j * n] = (i as f64 + 1.0) * argvals[j];
413            }
414        }
415
416        let matrix = inner_product_matrix(&data, n, m, &argvals);
417
418        // Diagonal elements should be positive
419        for i in 0..n {
420            assert!(matrix[i + i * n] > 0.0, "Diagonal should be positive");
421        }
422    }
423
424    #[test]
425    fn test_inner_product_matrix_invalid() {
426        let result = inner_product_matrix(&[], 0, 0, &[]);
427        assert!(result.is_empty());
428    }
429
430    // ============== PCvM statistic tests ==============
431
432    #[test]
433    fn test_pcvm_statistic_zero_residuals() {
434        let n = 5;
435        let inprod_len = (n * n + n) / 2;
436        let inprod: Vec<f64> = (0..inprod_len).map(|i| i as f64 * 0.1).collect();
437        let adot = compute_adot(n, &inprod);
438
439        let residuals = vec![0.0; n];
440        let stat = pcvm_statistic(&adot, &residuals);
441
442        assert!(
443            stat.abs() < 1e-10,
444            "Zero residuals should give zero statistic"
445        );
446    }
447
448    #[test]
449    fn test_pcvm_statistic_positive() {
450        let n = 5;
451        let inprod_len = (n * n + n) / 2;
452        let inprod: Vec<f64> = (0..inprod_len).map(|i| (i as f64 * 0.1).max(0.1)).collect();
453        let adot = compute_adot(n, &inprod);
454
455        let residuals = vec![1.0, -0.5, 0.3, -0.2, 0.4];
456        let stat = pcvm_statistic(&adot, &residuals);
457
458        assert!(stat.is_finite(), "Statistic should be finite");
459    }
460
461    // ============== k-NN prediction tests ==============
462
463    #[test]
464    fn test_knn_predict_k1() {
465        // Simple case: k=1 returns nearest neighbor's value
466        let n_train = 3;
467        let n_test = 2;
468        let y = vec![1.0, 2.0, 3.0];
469
470        // Distance matrix layout: [n_test x n_train] column-major
471        // Element [i + j * n_test] = distance from test i to train j
472        let distance_matrix = vec![
473            0.1, 0.9, // dist(test0, train0), dist(test1, train0)
474            0.5, 0.2, // dist(test0, train1), dist(test1, train1)
475            0.8, 0.1, // dist(test0, train2), dist(test1, train2)
476        ];
477
478        let predictions = knn_predict(&distance_matrix, &y, n_train, n_test, 1);
479
480        assert_eq!(predictions.len(), 2);
481        assert!(
482            (predictions[0] - 1.0).abs() < 1e-10,
483            "Test 0 nearest to train 0"
484        );
485        assert!(
486            (predictions[1] - 3.0).abs() < 1e-10,
487            "Test 1 nearest to train 2"
488        );
489    }
490
491    #[test]
492    fn test_knn_predict_k_all() {
493        // k = n_train should return mean of all training values
494        let n_train = 4;
495        let n_test = 1;
496        let y = vec![1.0, 2.0, 3.0, 4.0];
497        let expected_mean = 2.5;
498
499        let distance_matrix = vec![0.1, 0.2, 0.3, 0.4]; // arbitrary distances
500
501        let predictions = knn_predict(&distance_matrix, &y, n_train, n_test, n_train);
502
503        assert!(
504            (predictions[0] - expected_mean).abs() < 1e-10,
505            "k=n should return mean"
506        );
507    }
508
509    #[test]
510    fn test_knn_predict_invalid() {
511        let result = knn_predict(&[], &[], 0, 1, 1);
512        assert_eq!(result, vec![0.0]);
513    }
514
515    // ============== k-NN LOOCV tests ==============
516
517    #[test]
518    fn test_knn_loocv_returns_finite() {
519        let n = 5;
520        let y = vec![1.0, 2.0, 1.5, 2.5, 1.8];
521
522        // Create distance matrix (symmetric with zero diagonal)
523        let mut distance_matrix = vec![0.0; n * n];
524        for i in 0..n {
525            for j in 0..n {
526                let dist = ((i as f64) - (j as f64)).abs();
527                distance_matrix[i + j * n] = dist;
528            }
529        }
530
531        let error = knn_loocv(&distance_matrix, &y, n, 2);
532
533        assert!(error.is_finite(), "LOOCV error should be finite");
534        assert!(error >= 0.0, "LOOCV error should be non-negative");
535    }
536
537    #[test]
538    fn test_knn_loocv_invalid() {
539        let result = knn_loocv(&[], &[], 0, 1);
540        assert!(result.is_infinite());
541
542        let result = knn_loocv(&[0.0; 4], &[1.0, 2.0], 2, 0);
543        assert!(result.is_infinite());
544    }
545}