1use crate::helpers::simpsons_weights;
4use rayon::prelude::*;
5use std::f64::consts::PI;
6
7pub fn integrate_simpson(values: &[f64], argvals: &[f64]) -> f64 {
13 if values.len() != argvals.len() || values.is_empty() {
14 return 0.0;
15 }
16
17 let weights = simpsons_weights(argvals);
18 values
19 .iter()
20 .zip(weights.iter())
21 .map(|(&v, &w)| v * w)
22 .sum()
23}
24
25pub fn inner_product(curve1: &[f64], curve2: &[f64], argvals: &[f64]) -> f64 {
32 if curve1.len() != curve2.len() || curve1.len() != argvals.len() || curve1.is_empty() {
33 return 0.0;
34 }
35
36 let weights = simpsons_weights(argvals);
37 curve1
38 .iter()
39 .zip(curve2.iter())
40 .zip(weights.iter())
41 .map(|((&c1, &c2), &w)| c1 * c2 * w)
42 .sum()
43}
44
45pub fn inner_product_matrix(data: &[f64], n: usize, m: usize, argvals: &[f64]) -> Vec<f64> {
56 if n == 0 || m == 0 || argvals.len() != m || data.len() != n * m {
57 return Vec::new();
58 }
59
60 let weights = simpsons_weights(argvals);
61
62 let upper_triangle: Vec<(usize, usize, f64)> = (0..n)
64 .into_par_iter()
65 .flat_map(|i| {
66 (i..n)
67 .map(|j| {
68 let mut ip = 0.0;
69 for k in 0..m {
70 ip += data[i + k * n] * data[j + k * n] * weights[k];
71 }
72 (i, j, ip)
73 })
74 .collect::<Vec<_>>()
75 })
76 .collect();
77
78 let mut result = vec![0.0; n * n];
80 for (i, j, ip) in upper_triangle {
81 result[i + j * n] = ip;
82 result[j + i * n] = ip;
83 }
84
85 result
86}
87
88pub fn compute_adot(n: usize, inprod: &[f64]) -> Vec<f64> {
90 if n == 0 {
91 return Vec::new();
92 }
93
94 let expected_len = (n * n + n) / 2;
95 if inprod.len() != expected_len {
96 return Vec::new();
97 }
98
99 let out_len = (n * n - n + 2) / 2;
100 let mut adot_vec = vec![0.0; out_len];
101
102 adot_vec[0] = PI * (n + 1) as f64;
103
104 let pairs: Vec<(usize, usize)> = (2..=n).flat_map(|i| (1..i).map(move |j| (i, j))).collect();
106
107 let results: Vec<(usize, f64)> = pairs
109 .into_par_iter()
110 .map(|(i, j)| {
111 let mut sumr = 0.0;
112
113 for r in 1..=n {
114 if i == r || j == r {
115 sumr += PI;
116 } else {
117 let auxi = i * (i - 1) / 2;
118 let auxj = j * (j - 1) / 2;
119 let auxr = r * (r - 1) / 2;
120
121 let ij = auxi + j - 1;
122 let ii = auxi + i - 1;
123 let jj = auxj + j - 1;
124 let rr = auxr + r - 1;
125
126 let ir = if i > r { auxi + r - 1 } else { auxr + i - 1 };
127 let rj = if r > j { auxr + j - 1 } else { auxj + r - 1 };
128 let jr = rj;
129
130 let num = inprod[ij] - inprod[ir] - inprod[rj] + inprod[rr];
131 let aux1 = (inprod[ii] - 2.0 * inprod[ir] + inprod[rr]).sqrt();
132 let aux2 = (inprod[jj] - 2.0 * inprod[jr] + inprod[rr]).sqrt();
133 let den = aux1 * aux2;
134
135 let mut quo = if den.abs() > 1e-10 { num / den } else { 0.0 };
136 quo = quo.clamp(-1.0, 1.0);
137
138 sumr += (PI - quo.acos()).abs();
139 }
140 }
141
142 let idx = 1 + ((i - 1) * (i - 2) / 2) + j - 1;
143 (idx, sumr)
144 })
145 .collect();
146
147 for (idx, val) in results {
149 if idx < adot_vec.len() {
150 adot_vec[idx] = val;
151 }
152 }
153
154 adot_vec
155}
156
157pub fn pcvm_statistic(adot_vec: &[f64], residuals: &[f64]) -> f64 {
159 let n = residuals.len();
160
161 if n == 0 || adot_vec.is_empty() {
162 return 0.0;
163 }
164
165 let mut sums = 0.0;
166 for i in 2..=n {
167 for j in 1..i {
168 let idx = 1 + ((i - 1) * (i - 2) / 2) + j - 1;
169 if idx < adot_vec.len() {
170 sums += residuals[i - 1] * adot_vec[idx] * residuals[j - 1];
171 }
172 }
173 }
174
175 let diag_sum: f64 = residuals.iter().map(|r| r * r).sum();
176 adot_vec[0] * diag_sum + 2.0 * sums
177}
178
179pub struct RpStatResult {
181 pub cvm: Vec<f64>,
183 pub ks: Vec<f64>,
185}
186
187pub fn rp_stat(proj_x_ord: &[i32], residuals: &[f64], n_proj: usize) -> RpStatResult {
189 let n = residuals.len();
190
191 if n == 0 || n_proj == 0 || proj_x_ord.len() != n * n_proj {
192 return RpStatResult {
193 cvm: Vec::new(),
194 ks: Vec::new(),
195 };
196 }
197
198 let stats: Vec<(f64, f64)> = (0..n_proj)
200 .into_par_iter()
201 .map(|p| {
202 let mut y = vec![0.0; n];
203 let mut cumsum = 0.0;
204
205 for i in 0..n {
206 let idx = proj_x_ord[p * n + i] as usize;
207 if idx > 0 && idx <= n {
208 cumsum += residuals[idx - 1];
209 }
210 y[i] = cumsum;
211 }
212
213 let sum_y_sq: f64 = y.iter().map(|yi| yi * yi).sum();
214 let cvm = sum_y_sq / (n * n) as f64;
215
216 let max_abs_y = y.iter().map(|yi| yi.abs()).fold(0.0, f64::max);
217 let ks = max_abs_y / (n as f64).sqrt();
218
219 (cvm, ks)
220 })
221 .collect();
222
223 let cvm_stats: Vec<f64> = stats.iter().map(|(cvm, _)| *cvm).collect();
224 let ks_stats: Vec<f64> = stats.iter().map(|(_, ks)| *ks).collect();
225
226 RpStatResult {
227 cvm: cvm_stats,
228 ks: ks_stats,
229 }
230}
231
232pub fn knn_predict(
234 distance_matrix: &[f64],
235 y: &[f64],
236 n_train: usize,
237 n_test: usize,
238 k: usize,
239) -> Vec<f64> {
240 if n_train == 0 || n_test == 0 || k == 0 || y.len() != n_train {
241 return vec![0.0; n_test];
242 }
243
244 let k = k.min(n_train);
245
246 (0..n_test)
247 .into_par_iter()
248 .map(|i| {
249 let mut distances: Vec<(usize, f64)> = (0..n_train)
251 .map(|j| (j, distance_matrix[i + j * n_test]))
252 .collect();
253
254 distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
256
257 let sum: f64 = distances.iter().take(k).map(|(j, _)| y[*j]).sum();
259 sum / k as f64
260 })
261 .collect()
262}
263
264pub fn knn_loocv(distance_matrix: &[f64], y: &[f64], n: usize, k: usize) -> f64 {
266 if n == 0 || k == 0 || y.len() != n || distance_matrix.len() != n * n {
267 return f64::INFINITY;
268 }
269
270 let k = k.min(n - 1);
271
272 let errors: Vec<f64> = (0..n)
273 .into_par_iter()
274 .map(|i| {
275 let mut distances: Vec<(usize, f64)> = (0..n)
277 .filter(|&j| j != i)
278 .map(|j| (j, distance_matrix[i + j * n]))
279 .collect();
280
281 distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
283
284 let pred: f64 = distances.iter().take(k).map(|(j, _)| y[*j]).sum::<f64>() / k as f64;
286
287 (y[i] - pred).powi(2)
289 })
290 .collect();
291
292 errors.iter().sum::<f64>() / n as f64
293}
294
295#[cfg(test)]
296mod tests {
297 use super::*;
298
299 fn uniform_grid(n: usize) -> Vec<f64> {
300 (0..n).map(|i| i as f64 / (n - 1) as f64).collect()
301 }
302
303 #[test]
306 fn test_integrate_simpson_constant() {
307 let argvals = uniform_grid(21);
309 let values: Vec<f64> = vec![2.0; 21];
310
311 let result = integrate_simpson(&values, &argvals);
312 assert!(
313 (result - 2.0).abs() < 0.01,
314 "Integral of constant 2 should be 2, got {}",
315 result
316 );
317 }
318
319 #[test]
320 fn test_integrate_simpson_linear() {
321 let argvals = uniform_grid(51);
323 let values: Vec<f64> = argvals.clone();
324
325 let result = integrate_simpson(&values, &argvals);
326 assert!(
327 (result - 0.5).abs() < 0.01,
328 "Integral of x should be 0.5, got {}",
329 result
330 );
331 }
332
333 #[test]
334 fn test_integrate_simpson_invalid() {
335 assert!(integrate_simpson(&[], &[]).abs() < 1e-10);
336 assert!(integrate_simpson(&[1.0, 2.0], &[0.0]).abs() < 1e-10);
337 }
338
339 #[test]
342 fn test_inner_product_orthogonal() {
343 let argvals = uniform_grid(101);
344 let curve1: Vec<f64> = argvals.iter().map(|&t| (2.0 * PI * t).sin()).collect();
346 let curve2: Vec<f64> = argvals.iter().map(|&t| (4.0 * PI * t).sin()).collect();
347
348 let ip = inner_product(&curve1, &curve2, &argvals);
349 assert!(
350 ip.abs() < 0.05,
351 "Orthogonal functions should have near-zero inner product, got {}",
352 ip
353 );
354 }
355
356 #[test]
357 fn test_inner_product_self_positive() {
358 let argvals = uniform_grid(51);
359 let curve: Vec<f64> = argvals.iter().map(|&t| (2.0 * PI * t).sin()).collect();
360
361 let ip = inner_product(&curve, &curve, &argvals);
362 assert!(ip > 0.0, "Self inner product should be positive");
363 assert!((ip - 0.5).abs() < 0.05, "Expected ~0.5, got {}", ip);
365 }
366
367 #[test]
368 fn test_inner_product_invalid() {
369 assert!(inner_product(&[], &[], &[]).abs() < 1e-10);
370 assert!(inner_product(&[1.0], &[1.0, 2.0], &[0.0]).abs() < 1e-10);
371 }
372
373 #[test]
376 fn test_inner_product_matrix_symmetric() {
377 let n = 5;
378 let m = 21;
379 let argvals = uniform_grid(m);
380
381 let mut data = vec![0.0; n * m];
383 for i in 0..n {
384 for j in 0..m {
385 data[i + j * n] = (i as f64 + 1.0) * argvals[j];
386 }
387 }
388
389 let matrix = inner_product_matrix(&data, n, m, &argvals);
390 assert_eq!(matrix.len(), n * n);
391
392 for i in 0..n {
394 for j in 0..n {
395 assert!(
396 (matrix[i + j * n] - matrix[j + i * n]).abs() < 1e-10,
397 "Matrix should be symmetric"
398 );
399 }
400 }
401 }
402
403 #[test]
404 fn test_inner_product_matrix_diagonal_positive() {
405 let n = 3;
406 let m = 21;
407 let argvals = uniform_grid(m);
408
409 let mut data = vec![0.0; n * m];
410 for i in 0..n {
411 for j in 0..m {
412 data[i + j * n] = (i as f64 + 1.0) * argvals[j];
413 }
414 }
415
416 let matrix = inner_product_matrix(&data, n, m, &argvals);
417
418 for i in 0..n {
420 assert!(matrix[i + i * n] > 0.0, "Diagonal should be positive");
421 }
422 }
423
424 #[test]
425 fn test_inner_product_matrix_invalid() {
426 let result = inner_product_matrix(&[], 0, 0, &[]);
427 assert!(result.is_empty());
428 }
429
430 #[test]
433 fn test_pcvm_statistic_zero_residuals() {
434 let n = 5;
435 let inprod_len = (n * n + n) / 2;
436 let inprod: Vec<f64> = (0..inprod_len).map(|i| i as f64 * 0.1).collect();
437 let adot = compute_adot(n, &inprod);
438
439 let residuals = vec![0.0; n];
440 let stat = pcvm_statistic(&adot, &residuals);
441
442 assert!(
443 stat.abs() < 1e-10,
444 "Zero residuals should give zero statistic"
445 );
446 }
447
448 #[test]
449 fn test_pcvm_statistic_positive() {
450 let n = 5;
451 let inprod_len = (n * n + n) / 2;
452 let inprod: Vec<f64> = (0..inprod_len).map(|i| (i as f64 * 0.1).max(0.1)).collect();
453 let adot = compute_adot(n, &inprod);
454
455 let residuals = vec![1.0, -0.5, 0.3, -0.2, 0.4];
456 let stat = pcvm_statistic(&adot, &residuals);
457
458 assert!(stat.is_finite(), "Statistic should be finite");
459 }
460
461 #[test]
464 fn test_knn_predict_k1() {
465 let n_train = 3;
467 let n_test = 2;
468 let y = vec![1.0, 2.0, 3.0];
469
470 let distance_matrix = vec![
473 0.1, 0.9, 0.5, 0.2, 0.8, 0.1, ];
477
478 let predictions = knn_predict(&distance_matrix, &y, n_train, n_test, 1);
479
480 assert_eq!(predictions.len(), 2);
481 assert!(
482 (predictions[0] - 1.0).abs() < 1e-10,
483 "Test 0 nearest to train 0"
484 );
485 assert!(
486 (predictions[1] - 3.0).abs() < 1e-10,
487 "Test 1 nearest to train 2"
488 );
489 }
490
491 #[test]
492 fn test_knn_predict_k_all() {
493 let n_train = 4;
495 let n_test = 1;
496 let y = vec![1.0, 2.0, 3.0, 4.0];
497 let expected_mean = 2.5;
498
499 let distance_matrix = vec![0.1, 0.2, 0.3, 0.4]; let predictions = knn_predict(&distance_matrix, &y, n_train, n_test, n_train);
502
503 assert!(
504 (predictions[0] - expected_mean).abs() < 1e-10,
505 "k=n should return mean"
506 );
507 }
508
509 #[test]
510 fn test_knn_predict_invalid() {
511 let result = knn_predict(&[], &[], 0, 1, 1);
512 assert_eq!(result, vec![0.0]);
513 }
514
515 #[test]
518 fn test_knn_loocv_returns_finite() {
519 let n = 5;
520 let y = vec![1.0, 2.0, 1.5, 2.5, 1.8];
521
522 let mut distance_matrix = vec![0.0; n * n];
524 for i in 0..n {
525 for j in 0..n {
526 let dist = ((i as f64) - (j as f64)).abs();
527 distance_matrix[i + j * n] = dist;
528 }
529 }
530
531 let error = knn_loocv(&distance_matrix, &y, n, 2);
532
533 assert!(error.is_finite(), "LOOCV error should be finite");
534 assert!(error >= 0.0, "LOOCV error should be non-negative");
535 }
536
537 #[test]
538 fn test_knn_loocv_invalid() {
539 let result = knn_loocv(&[], &[], 0, 1);
540 assert!(result.is_infinite());
541
542 let result = knn_loocv(&[0.0; 4], &[1.0, 2.0], 2, 0);
543 assert!(result.is_infinite());
544 }
545}