Skip to main content

fdars_core/
utility.rs

1//! Utility functions for functional data analysis.
2
3use crate::helpers::simpsons_weights;
4use crate::iter_maybe_parallel;
5use crate::matrix::FdMatrix;
6#[cfg(feature = "parallel")]
7use rayon::iter::ParallelIterator;
8use std::f64::consts::PI;
9
10/// Compute Simpson's rule integration for a single function.
11///
12/// # Arguments
13/// * `values` - Function values at evaluation points
14/// * `argvals` - Evaluation points
15pub fn integrate_simpson(values: &[f64], argvals: &[f64]) -> f64 {
16    if values.len() != argvals.len() || values.is_empty() {
17        return 0.0;
18    }
19
20    let weights = simpsons_weights(argvals);
21    values
22        .iter()
23        .zip(weights.iter())
24        .map(|(&v, &w)| v * w)
25        .sum()
26}
27
28/// Compute inner product between two functional data curves.
29///
30/// # Arguments
31/// * `curve1` - First curve values
32/// * `curve2` - Second curve values
33/// * `argvals` - Evaluation points
34pub fn inner_product(curve1: &[f64], curve2: &[f64], argvals: &[f64]) -> f64 {
35    if curve1.len() != curve2.len() || curve1.len() != argvals.len() || curve1.is_empty() {
36        return 0.0;
37    }
38
39    let weights = simpsons_weights(argvals);
40    curve1
41        .iter()
42        .zip(curve2.iter())
43        .zip(weights.iter())
44        .map(|((&c1, &c2), &w)| c1 * c2 * w)
45        .sum()
46}
47
48/// Compute inner product matrix for functional data.
49///
50/// # Arguments
51/// * `data` - Matrix of observations (n rows) x evaluation points (m cols)
52/// * `argvals` - Evaluation points
53///
54/// # Returns
55/// Symmetric inner product matrix (n x n)
56pub fn inner_product_matrix(data: &FdMatrix, argvals: &[f64]) -> FdMatrix {
57    let n = data.nrows();
58    let m = data.ncols();
59
60    if n == 0 || m == 0 || argvals.len() != m {
61        return FdMatrix::zeros(0, 0);
62    }
63
64    let weights = simpsons_weights(argvals);
65
66    // Compute upper triangle (parallel when feature enabled)
67    let upper_triangle: Vec<(usize, usize, f64)> = iter_maybe_parallel!(0..n)
68        .flat_map(|i| {
69            (i..n)
70                .map(|j| {
71                    let mut ip = 0.0;
72                    for k in 0..m {
73                        ip += data[(i, k)] * data[(j, k)] * weights[k];
74                    }
75                    (i, j, ip)
76                })
77                .collect::<Vec<_>>()
78        })
79        .collect();
80
81    // Build symmetric matrix
82    let mut result = FdMatrix::zeros(n, n);
83    for (i, j, ip) in upper_triangle {
84        result[(i, j)] = ip;
85        result[(j, i)] = ip;
86    }
87
88    result
89}
90
91/// Compute the Adot matrix used in PCvM statistic.
92/// Packed symmetric index for 1-based indices in lower-triangular storage.
93fn packed_sym_index(a: usize, b: usize) -> usize {
94    let (hi, lo) = if a >= b { (a, b) } else { (b, a) };
95    hi * (hi - 1) / 2 + lo - 1
96}
97
98/// Compute the angular distance sum for a single (i, j) pair over all reference points.
99fn adot_pair_sum(inprod: &[f64], n: usize, i: usize, j: usize) -> f64 {
100    let ij = packed_sym_index(i, j);
101    let ii = packed_sym_index(i, i);
102    let jj = packed_sym_index(j, j);
103    let mut sumr = 0.0;
104
105    for r in 1..=n {
106        if i == r || j == r {
107            sumr += PI;
108        } else {
109            let rr = packed_sym_index(r, r);
110            let ir = packed_sym_index(i, r);
111            let rj = packed_sym_index(r, j);
112
113            let num = inprod[ij] - inprod[ir] - inprod[rj] + inprod[rr];
114            let aux1 = (inprod[ii] - 2.0 * inprod[ir] + inprod[rr]).sqrt();
115            let aux2 = (inprod[jj] - 2.0 * inprod[rj] + inprod[rr]).sqrt();
116            let den = aux1 * aux2;
117
118            let mut quo = if den.abs() > 1e-10 { num / den } else { 0.0 };
119            quo = quo.clamp(-1.0, 1.0);
120
121            sumr += (PI - quo.acos()).abs();
122        }
123    }
124
125    sumr
126}
127
128pub fn compute_adot(n: usize, inprod: &[f64]) -> Vec<f64> {
129    if n == 0 {
130        return Vec::new();
131    }
132
133    let expected_len = (n * n + n) / 2;
134    if inprod.len() != expected_len {
135        return Vec::new();
136    }
137
138    let out_len = (n * n - n + 2) / 2;
139    let mut adot_vec = vec![0.0; out_len];
140
141    adot_vec[0] = PI * (n + 1) as f64;
142
143    // Collect all (i, j) pairs for parallel processing
144    let pairs: Vec<(usize, usize)> = (2..=n).flat_map(|i| (1..i).map(move |j| (i, j))).collect();
145
146    // Compute adot values (parallel when feature enabled)
147    let results: Vec<(usize, f64)> = iter_maybe_parallel!(pairs)
148        .map(|(i, j)| {
149            let sumr = adot_pair_sum(inprod, n, i, j);
150            let idx = 1 + ((i - 1) * (i - 2) / 2) + j - 1;
151            (idx, sumr)
152        })
153        .collect();
154
155    // Fill in the results
156    for (idx, val) in results {
157        if idx < adot_vec.len() {
158            adot_vec[idx] = val;
159        }
160    }
161
162    adot_vec
163}
164
165/// Compute the PCvM statistic.
166pub fn pcvm_statistic(adot_vec: &[f64], residuals: &[f64]) -> f64 {
167    let n = residuals.len();
168
169    if n == 0 || adot_vec.is_empty() {
170        return 0.0;
171    }
172
173    let mut sums = 0.0;
174    for i in 2..=n {
175        for j in 1..i {
176            let idx = 1 + ((i - 1) * (i - 2) / 2) + j - 1;
177            if idx < adot_vec.len() {
178                sums += residuals[i - 1] * adot_vec[idx] * residuals[j - 1];
179            }
180        }
181    }
182
183    let diag_sum: f64 = residuals.iter().map(|r| r * r).sum();
184    adot_vec[0] * diag_sum + 2.0 * sums
185}
186
187/// Result of random projection statistics.
188#[derive(Debug, Clone, PartialEq)]
189pub struct RpStatResult {
190    /// CvM statistics for each projection
191    pub cvm: Vec<f64>,
192    /// KS statistics for each projection
193    pub ks: Vec<f64>,
194}
195
196/// Compute random projection statistics.
197pub fn rp_stat(proj_x_ord: &[i32], residuals: &[f64], n_proj: usize) -> RpStatResult {
198    let n = residuals.len();
199
200    if n == 0 || n_proj == 0 || proj_x_ord.len() != n * n_proj {
201        return RpStatResult {
202            cvm: Vec::new(),
203            ks: Vec::new(),
204        };
205    }
206
207    // Process projections (parallel when feature enabled)
208    let stats: Vec<(f64, f64)> = iter_maybe_parallel!(0..n_proj)
209        .map(|p| {
210            let mut y = vec![0.0; n];
211            let mut cumsum = 0.0;
212
213            for i in 0..n {
214                let idx = proj_x_ord[p * n + i] as usize;
215                if idx > 0 && idx <= n {
216                    cumsum += residuals[idx - 1];
217                }
218                y[i] = cumsum;
219            }
220
221            let sum_y_sq: f64 = y.iter().map(|yi| yi * yi).sum();
222            let cvm = sum_y_sq / (n * n) as f64;
223
224            let max_abs_y = y.iter().map(|yi| yi.abs()).fold(0.0, f64::max);
225            let ks = max_abs_y / (n as f64).sqrt();
226
227            (cvm, ks)
228        })
229        .collect();
230
231    let cvm_stats: Vec<f64> = stats.iter().map(|(cvm, _)| *cvm).collect();
232    let ks_stats: Vec<f64> = stats.iter().map(|(_, ks)| *ks).collect();
233
234    RpStatResult {
235        cvm: cvm_stats,
236        ks: ks_stats,
237    }
238}
239
240/// k-NN prediction for functional regression.
241///
242/// # Arguments
243/// * `distance_matrix` - Distance matrix (n_test rows x n_train cols)
244/// * `y` - Training response values (length n_train)
245/// * `k` - Number of nearest neighbors
246pub fn knn_predict(distance_matrix: &FdMatrix, y: &[f64], k: usize) -> Vec<f64> {
247    let n_test = distance_matrix.nrows();
248    let n_train = distance_matrix.ncols();
249
250    if n_train == 0 || n_test == 0 || k == 0 || y.len() != n_train {
251        return vec![0.0; n_test];
252    }
253
254    let k = k.min(n_train);
255
256    iter_maybe_parallel!(0..n_test)
257        .map(|i| {
258            // Get distances from test point i to all training points
259            let mut distances: Vec<(usize, f64)> =
260                (0..n_train).map(|j| (j, distance_matrix[(i, j)])).collect();
261
262            // Sort by distance
263            distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
264
265            // Average of k nearest neighbors
266            let sum: f64 = distances.iter().take(k).map(|(j, _)| y[*j]).sum();
267            sum / k as f64
268        })
269        .collect()
270}
271
272/// Compute leave-one-out cross-validation error for k-NN.
273///
274/// # Arguments
275/// * `distance_matrix` - Square distance matrix (n x n)
276/// * `y` - Response values (length n)
277/// * `k` - Number of nearest neighbors
278pub fn knn_loocv(distance_matrix: &FdMatrix, y: &[f64], k: usize) -> f64 {
279    let n = distance_matrix.nrows();
280
281    if n == 0 || k == 0 || y.len() != n || distance_matrix.ncols() != n {
282        return f64::INFINITY;
283    }
284
285    let k = k.min(n - 1);
286
287    let errors: Vec<f64> = iter_maybe_parallel!(0..n)
288        .map(|i| {
289            // Get distances from point i to all other points
290            let mut distances: Vec<(usize, f64)> = (0..n)
291                .filter(|&j| j != i)
292                .map(|j| (j, distance_matrix[(i, j)]))
293                .collect();
294
295            // Sort by distance
296            distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
297
298            // Prediction
299            let pred: f64 = distances.iter().take(k).map(|(j, _)| y[*j]).sum::<f64>() / k as f64;
300
301            // Squared error
302            (y[i] - pred).powi(2)
303        })
304        .collect();
305
306    errors.iter().sum::<f64>() / n as f64
307}
308
309/// Safely convert a non-negative f64 to usize, clamping to `0..=usize::MAX`.
310///
311/// Returns 0 for negative values and NaN.
312#[inline]
313pub(crate) fn f64_to_usize_clamped(x: f64) -> usize {
314    if x.is_nan() || x <= 0.0 {
315        0
316    } else if x >= usize::MAX as f64 {
317        usize::MAX
318    } else {
319        x as usize
320    }
321}
322
323#[cfg(test)]
324mod tests {
325    use super::*;
326
327    fn uniform_grid(n: usize) -> Vec<f64> {
328        (0..n).map(|i| i as f64 / (n - 1) as f64).collect()
329    }
330
331    #[test]
332    fn test_integrate_simpson_constant() {
333        let argvals = uniform_grid(11);
334        let values = vec![1.0; 11];
335        let result = integrate_simpson(&values, &argvals);
336        assert!((result - 1.0).abs() < 1e-10);
337    }
338
339    #[test]
340    fn test_inner_product_orthogonal() {
341        let argvals = uniform_grid(101);
342        let curve1: Vec<f64> = argvals.iter().map(|&t| (2.0 * PI * t).sin()).collect();
343        let curve2: Vec<f64> = argvals.iter().map(|&t| (2.0 * PI * t).cos()).collect();
344        let result = inner_product(&curve1, &curve2, &argvals);
345        assert!(result.abs() < 0.01);
346    }
347
348    #[test]
349    fn test_inner_product_matrix_symmetry() {
350        let n = 5;
351        let m = 10;
352        let argvals = uniform_grid(m);
353        let data: Vec<f64> = (0..n * m).map(|i| (i as f64).sin()).collect();
354        let mat = FdMatrix::from_column_major(data, n, m).unwrap();
355
356        let matrix = inner_product_matrix(&mat, &argvals);
357
358        for i in 0..n {
359            for j in 0..n {
360                let diff = (matrix[(i, j)] - matrix[(j, i)]).abs();
361                assert!(diff < 1e-10, "Matrix should be symmetric");
362            }
363        }
364    }
365
366    #[test]
367    fn test_knn_predict() {
368        let n_train = 10;
369        let n_test = 3;
370        let k = 3;
371
372        let mut distance_data = vec![0.0; n_test * n_train];
373        for i in 0..n_test {
374            for j in 0..n_train {
375                distance_data[i + j * n_test] = ((i as f64) - (j as f64)).abs();
376            }
377        }
378        let distance_matrix = FdMatrix::from_column_major(distance_data, n_test, n_train).unwrap();
379
380        let y: Vec<f64> = (0..n_train).map(|i| i as f64).collect();
381        let predictions = knn_predict(&distance_matrix, &y, k);
382
383        assert_eq!(predictions.len(), n_test);
384    }
385
386    // ============== compute_adot tests ==============
387
388    #[test]
389    fn test_compute_adot_basic() {
390        let n = 4;
391        // Upper-triangular packed inner product: (n*(n+1))/2 = 10 elements
392        // Layout: (1,1), (2,1), (2,2), (3,1), (3,2), (3,3), (4,1), (4,2), (4,3), (4,4)
393        let mut inprod = vec![0.0; (n * (n + 1)) / 2];
394        // Set diagonal to 1.0 (identity-like)
395        // idx for (i,i): i*(i-1)/2 + i - 1 = i*(i+1)/2 - 1
396        for i in 1..=n {
397            let idx = i * (i - 1) / 2 + i - 1;
398            inprod[idx] = 1.0;
399        }
400
401        let adot = compute_adot(n, &inprod);
402
403        let expected_len = (n * n - n + 2) / 2;
404        assert_eq!(
405            adot.len(),
406            expected_len,
407            "Adot length should be (n^2-n+2)/2"
408        );
409        assert!(
410            (adot[0] - PI * (n + 1) as f64).abs() < 1e-10,
411            "First element should be π*(n+1), got {}",
412            adot[0]
413        );
414        for (i, &val) in adot.iter().enumerate() {
415            assert!(val.is_finite(), "Adot[{}] should be finite, got {}", i, val);
416        }
417    }
418
419    #[test]
420    fn test_compute_adot_n1() {
421        let n = 1;
422        let inprod = vec![1.0]; // (1*(1+1))/2 = 1
423        let adot = compute_adot(n, &inprod);
424
425        assert_eq!(adot.len(), 1, "n=1 should give length 1");
426        assert!(
427            (adot[0] - PI * 2.0).abs() < 1e-10,
428            "n=1: first element should be π*2, got {}",
429            adot[0]
430        );
431    }
432
433    #[test]
434    fn test_compute_adot_invalid() {
435        // n=0
436        assert!(compute_adot(0, &[]).is_empty());
437
438        // Wrong inprod length
439        assert!(compute_adot(4, &[1.0, 2.0]).is_empty());
440    }
441
442    // ============== pcvm_statistic tests ==============
443
444    #[test]
445    fn test_pcvm_statistic_basic() {
446        let n = 4;
447        let mut inprod = vec![0.0; (n * (n + 1)) / 2];
448        for i in 1..=n {
449            let idx = i * (i - 1) / 2 + i - 1;
450            inprod[idx] = 1.0;
451        }
452        let adot = compute_adot(n, &inprod);
453        let residuals = vec![0.5, -0.3, 0.2, -0.1];
454
455        let stat = pcvm_statistic(&adot, &residuals);
456
457        assert!(stat.is_finite(), "PCvM statistic should be finite");
458        assert!(stat >= 0.0, "PCvM statistic should be non-negative");
459    }
460
461    #[test]
462    fn test_pcvm_statistic_zero_residuals() {
463        let n = 4;
464        let mut inprod = vec![0.0; (n * (n + 1)) / 2];
465        for i in 1..=n {
466            let idx = i * (i - 1) / 2 + i - 1;
467            inprod[idx] = 1.0;
468        }
469        let adot = compute_adot(n, &inprod);
470        let residuals = vec![0.0, 0.0, 0.0, 0.0];
471
472        let stat = pcvm_statistic(&adot, &residuals);
473        assert!(
474            stat.abs() < 1e-10,
475            "PCvM with zero residuals should be ~0, got {}",
476            stat
477        );
478    }
479
480    #[test]
481    fn test_pcvm_statistic_empty() {
482        assert!(pcvm_statistic(&[], &[]).abs() < 1e-10);
483        assert!(pcvm_statistic(&[1.0], &[]).abs() < 1e-10);
484    }
485
486    // ============== rp_stat tests ==============
487
488    #[test]
489    fn test_rp_stat_basic() {
490        let n_proj = 3;
491        let residuals = vec![0.5, -0.3, 0.2, -0.1, 0.4];
492
493        // proj_x_ord is n_proj columns of n rows, 1-indexed ranks
494        let proj_x_ord: Vec<i32> = vec![
495            1, 3, 5, 2, 4, // projection 1
496            2, 4, 1, 5, 3, // projection 2
497            5, 1, 3, 4, 2, // projection 3
498        ];
499
500        let result = rp_stat(&proj_x_ord, &residuals, n_proj);
501
502        assert_eq!(result.cvm.len(), n_proj);
503        assert_eq!(result.ks.len(), n_proj);
504        for &cvm_val in &result.cvm {
505            assert!(cvm_val >= 0.0, "CvM stat should be non-negative");
506            assert!(cvm_val.is_finite(), "CvM stat should be finite");
507        }
508        for &ks_val in &result.ks {
509            assert!(ks_val >= 0.0, "KS stat should be non-negative");
510            assert!(ks_val.is_finite(), "KS stat should be finite");
511        }
512    }
513
514    #[test]
515    fn test_rp_stat_invalid() {
516        let result = rp_stat(&[], &[], 0);
517        assert!(result.cvm.is_empty());
518        assert!(result.ks.is_empty());
519
520        let result = rp_stat(&[], &[1.0], 0);
521        assert!(result.cvm.is_empty());
522    }
523
524    // ============== knn_loocv tests ==============
525
526    #[test]
527    fn test_knn_loocv_basic() {
528        let size = 5;
529        let k = 2;
530        // Simple distance matrix
531        let mut dist_data = vec![0.0; size * size];
532        for i in 0..size {
533            for j in 0..size {
534                dist_data[i + j * size] = ((i as f64) - (j as f64)).abs();
535            }
536        }
537        let dist = FdMatrix::from_column_major(dist_data, size, size).unwrap();
538        let y: Vec<f64> = (0..size).map(|i| i as f64 * 2.0).collect();
539
540        let mse = knn_loocv(&dist, &y, k);
541
542        assert!(mse.is_finite(), "k-NN LOOCV MSE should be finite");
543        assert!(mse >= 0.0, "k-NN LOOCV MSE should be non-negative");
544    }
545
546    #[test]
547    fn test_knn_loocv_perfect() {
548        // When nearest neighbors have the same y value, MSE should be ~0
549        let n = 4;
550        let k = 1;
551        // Distance matrix where each pair of adjacent points is close
552        let mut dist = FdMatrix::from_column_major(vec![100.0; n * n], n, n).unwrap();
553        for i in 0..n {
554            dist[(i, i)] = 0.0;
555        }
556        // Make pairs (0,1) and (2,3) very close
557        dist[(0, 1)] = 0.1;
558        dist[(1, 0)] = 0.1;
559        dist[(2, 3)] = 0.1;
560        dist[(3, 2)] = 0.1;
561
562        // Same y for paired points
563        let y = vec![1.0, 1.0, 5.0, 5.0];
564        let mse = knn_loocv(&dist, &y, k);
565
566        assert!(
567            mse < 1e-10,
568            "k-NN LOOCV MSE should be ~0 for perfectly paired data, got {}",
569            mse
570        );
571    }
572
573    #[test]
574    fn test_knn_loocv_invalid() {
575        let empty = FdMatrix::zeros(0, 0);
576        assert!(knn_loocv(&empty, &[], 1).is_infinite());
577        let single = FdMatrix::from_column_major(vec![0.0], 1, 1).unwrap();
578        assert!(knn_loocv(&single, &[1.0], 0).is_infinite());
579    }
580}