1use crate::helpers::simpsons_weights;
4use crate::iter_maybe_parallel;
5use crate::matrix::FdMatrix;
6#[cfg(feature = "parallel")]
7use rayon::iter::ParallelIterator;
8use std::f64::consts::PI;
9
10pub fn integrate_simpson(values: &[f64], argvals: &[f64]) -> f64 {
16 if values.len() != argvals.len() || values.is_empty() {
17 return 0.0;
18 }
19
20 let weights = simpsons_weights(argvals);
21 values
22 .iter()
23 .zip(weights.iter())
24 .map(|(&v, &w)| v * w)
25 .sum()
26}
27
28pub fn inner_product(curve1: &[f64], curve2: &[f64], argvals: &[f64]) -> f64 {
35 if curve1.len() != curve2.len() || curve1.len() != argvals.len() || curve1.is_empty() {
36 return 0.0;
37 }
38
39 let weights = simpsons_weights(argvals);
40 curve1
41 .iter()
42 .zip(curve2.iter())
43 .zip(weights.iter())
44 .map(|((&c1, &c2), &w)| c1 * c2 * w)
45 .sum()
46}
47
48pub fn inner_product_matrix(data: &FdMatrix, argvals: &[f64]) -> FdMatrix {
57 let n = data.nrows();
58 let m = data.ncols();
59
60 if n == 0 || m == 0 || argvals.len() != m {
61 return FdMatrix::zeros(0, 0);
62 }
63
64 let weights = simpsons_weights(argvals);
65
66 let upper_triangle: Vec<(usize, usize, f64)> = iter_maybe_parallel!(0..n)
68 .flat_map(|i| {
69 (i..n)
70 .map(|j| {
71 let mut ip = 0.0;
72 for k in 0..m {
73 ip += data[(i, k)] * data[(j, k)] * weights[k];
74 }
75 (i, j, ip)
76 })
77 .collect::<Vec<_>>()
78 })
79 .collect();
80
81 let mut result = FdMatrix::zeros(n, n);
83 for (i, j, ip) in upper_triangle {
84 result[(i, j)] = ip;
85 result[(j, i)] = ip;
86 }
87
88 result
89}
90
91fn packed_sym_index(a: usize, b: usize) -> usize {
94 let (hi, lo) = if a >= b { (a, b) } else { (b, a) };
95 hi * (hi - 1) / 2 + lo - 1
96}
97
98fn adot_pair_sum(inprod: &[f64], n: usize, i: usize, j: usize) -> f64 {
100 let ij = packed_sym_index(i, j);
101 let ii = packed_sym_index(i, i);
102 let jj = packed_sym_index(j, j);
103 let mut sumr = 0.0;
104
105 for r in 1..=n {
106 if i == r || j == r {
107 sumr += PI;
108 } else {
109 let rr = packed_sym_index(r, r);
110 let ir = packed_sym_index(i, r);
111 let rj = packed_sym_index(r, j);
112
113 let num = inprod[ij] - inprod[ir] - inprod[rj] + inprod[rr];
114 let aux1 = (inprod[ii] - 2.0 * inprod[ir] + inprod[rr]).sqrt();
115 let aux2 = (inprod[jj] - 2.0 * inprod[rj] + inprod[rr]).sqrt();
116 let den = aux1 * aux2;
117
118 let mut quo = if den.abs() > 1e-10 { num / den } else { 0.0 };
119 quo = quo.clamp(-1.0, 1.0);
120
121 sumr += (PI - quo.acos()).abs();
122 }
123 }
124
125 sumr
126}
127
128pub fn compute_adot(n: usize, inprod: &[f64]) -> Vec<f64> {
129 if n == 0 {
130 return Vec::new();
131 }
132
133 let expected_len = (n * n + n) / 2;
134 if inprod.len() != expected_len {
135 return Vec::new();
136 }
137
138 let out_len = (n * n - n + 2) / 2;
139 let mut adot_vec = vec![0.0; out_len];
140
141 adot_vec[0] = PI * (n + 1) as f64;
142
143 let pairs: Vec<(usize, usize)> = (2..=n).flat_map(|i| (1..i).map(move |j| (i, j))).collect();
145
146 let results: Vec<(usize, f64)> = iter_maybe_parallel!(pairs)
148 .map(|(i, j)| {
149 let sumr = adot_pair_sum(inprod, n, i, j);
150 let idx = 1 + ((i - 1) * (i - 2) / 2) + j - 1;
151 (idx, sumr)
152 })
153 .collect();
154
155 for (idx, val) in results {
157 if idx < adot_vec.len() {
158 adot_vec[idx] = val;
159 }
160 }
161
162 adot_vec
163}
164
165pub fn pcvm_statistic(adot_vec: &[f64], residuals: &[f64]) -> f64 {
167 let n = residuals.len();
168
169 if n == 0 || adot_vec.is_empty() {
170 return 0.0;
171 }
172
173 let mut sums = 0.0;
174 for i in 2..=n {
175 for j in 1..i {
176 let idx = 1 + ((i - 1) * (i - 2) / 2) + j - 1;
177 if idx < adot_vec.len() {
178 sums += residuals[i - 1] * adot_vec[idx] * residuals[j - 1];
179 }
180 }
181 }
182
183 let diag_sum: f64 = residuals.iter().map(|r| r * r).sum();
184 adot_vec[0] * diag_sum + 2.0 * sums
185}
186
187#[derive(Debug, Clone, PartialEq)]
189pub struct RpStatResult {
190 pub cvm: Vec<f64>,
192 pub ks: Vec<f64>,
194}
195
196pub fn rp_stat(proj_x_ord: &[i32], residuals: &[f64], n_proj: usize) -> RpStatResult {
198 let n = residuals.len();
199
200 if n == 0 || n_proj == 0 || proj_x_ord.len() != n * n_proj {
201 return RpStatResult {
202 cvm: Vec::new(),
203 ks: Vec::new(),
204 };
205 }
206
207 let stats: Vec<(f64, f64)> = iter_maybe_parallel!(0..n_proj)
209 .map(|p| {
210 let mut y = vec![0.0; n];
211 let mut cumsum = 0.0;
212
213 for i in 0..n {
214 let idx = proj_x_ord[p * n + i] as usize;
215 if idx > 0 && idx <= n {
216 cumsum += residuals[idx - 1];
217 }
218 y[i] = cumsum;
219 }
220
221 let sum_y_sq: f64 = y.iter().map(|yi| yi * yi).sum();
222 let cvm = sum_y_sq / (n * n) as f64;
223
224 let max_abs_y = y.iter().map(|yi| yi.abs()).fold(0.0, f64::max);
225 let ks = max_abs_y / (n as f64).sqrt();
226
227 (cvm, ks)
228 })
229 .collect();
230
231 let cvm_stats: Vec<f64> = stats.iter().map(|(cvm, _)| *cvm).collect();
232 let ks_stats: Vec<f64> = stats.iter().map(|(_, ks)| *ks).collect();
233
234 RpStatResult {
235 cvm: cvm_stats,
236 ks: ks_stats,
237 }
238}
239
240pub fn knn_predict(distance_matrix: &FdMatrix, y: &[f64], k: usize) -> Vec<f64> {
247 let n_test = distance_matrix.nrows();
248 let n_train = distance_matrix.ncols();
249
250 if n_train == 0 || n_test == 0 || k == 0 || y.len() != n_train {
251 return vec![0.0; n_test];
252 }
253
254 let k = k.min(n_train);
255
256 iter_maybe_parallel!(0..n_test)
257 .map(|i| {
258 let mut distances: Vec<(usize, f64)> =
260 (0..n_train).map(|j| (j, distance_matrix[(i, j)])).collect();
261
262 distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
264
265 let sum: f64 = distances.iter().take(k).map(|(j, _)| y[*j]).sum();
267 sum / k as f64
268 })
269 .collect()
270}
271
272pub fn knn_loocv(distance_matrix: &FdMatrix, y: &[f64], k: usize) -> f64 {
279 let n = distance_matrix.nrows();
280
281 if n == 0 || k == 0 || y.len() != n || distance_matrix.ncols() != n {
282 return f64::INFINITY;
283 }
284
285 let k = k.min(n - 1);
286
287 let errors: Vec<f64> = iter_maybe_parallel!(0..n)
288 .map(|i| {
289 let mut distances: Vec<(usize, f64)> = (0..n)
291 .filter(|&j| j != i)
292 .map(|j| (j, distance_matrix[(i, j)]))
293 .collect();
294
295 distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
297
298 let pred: f64 = distances.iter().take(k).map(|(j, _)| y[*j]).sum::<f64>() / k as f64;
300
301 (y[i] - pred).powi(2)
303 })
304 .collect();
305
306 errors.iter().sum::<f64>() / n as f64
307}
308
309#[inline]
313pub(crate) fn f64_to_usize_clamped(x: f64) -> usize {
314 if x.is_nan() || x <= 0.0 {
315 0
316 } else if x >= usize::MAX as f64 {
317 usize::MAX
318 } else {
319 x as usize
320 }
321}
322
323#[cfg(test)]
324mod tests {
325 use super::*;
326
327 fn uniform_grid(n: usize) -> Vec<f64> {
328 (0..n).map(|i| i as f64 / (n - 1) as f64).collect()
329 }
330
331 #[test]
332 fn test_integrate_simpson_constant() {
333 let argvals = uniform_grid(11);
334 let values = vec![1.0; 11];
335 let result = integrate_simpson(&values, &argvals);
336 assert!((result - 1.0).abs() < 1e-10);
337 }
338
339 #[test]
340 fn test_inner_product_orthogonal() {
341 let argvals = uniform_grid(101);
342 let curve1: Vec<f64> = argvals.iter().map(|&t| (2.0 * PI * t).sin()).collect();
343 let curve2: Vec<f64> = argvals.iter().map(|&t| (2.0 * PI * t).cos()).collect();
344 let result = inner_product(&curve1, &curve2, &argvals);
345 assert!(result.abs() < 0.01);
346 }
347
348 #[test]
349 fn test_inner_product_matrix_symmetry() {
350 let n = 5;
351 let m = 10;
352 let argvals = uniform_grid(m);
353 let data: Vec<f64> = (0..n * m).map(|i| (i as f64).sin()).collect();
354 let mat = FdMatrix::from_column_major(data, n, m).unwrap();
355
356 let matrix = inner_product_matrix(&mat, &argvals);
357
358 for i in 0..n {
359 for j in 0..n {
360 let diff = (matrix[(i, j)] - matrix[(j, i)]).abs();
361 assert!(diff < 1e-10, "Matrix should be symmetric");
362 }
363 }
364 }
365
366 #[test]
367 fn test_knn_predict() {
368 let n_train = 10;
369 let n_test = 3;
370 let k = 3;
371
372 let mut distance_data = vec![0.0; n_test * n_train];
373 for i in 0..n_test {
374 for j in 0..n_train {
375 distance_data[i + j * n_test] = ((i as f64) - (j as f64)).abs();
376 }
377 }
378 let distance_matrix = FdMatrix::from_column_major(distance_data, n_test, n_train).unwrap();
379
380 let y: Vec<f64> = (0..n_train).map(|i| i as f64).collect();
381 let predictions = knn_predict(&distance_matrix, &y, k);
382
383 assert_eq!(predictions.len(), n_test);
384 }
385
386 #[test]
389 fn test_compute_adot_basic() {
390 let n = 4;
391 let mut inprod = vec![0.0; (n * (n + 1)) / 2];
394 for i in 1..=n {
397 let idx = i * (i - 1) / 2 + i - 1;
398 inprod[idx] = 1.0;
399 }
400
401 let adot = compute_adot(n, &inprod);
402
403 let expected_len = (n * n - n + 2) / 2;
404 assert_eq!(
405 adot.len(),
406 expected_len,
407 "Adot length should be (n^2-n+2)/2"
408 );
409 assert!(
410 (adot[0] - PI * (n + 1) as f64).abs() < 1e-10,
411 "First element should be π*(n+1), got {}",
412 adot[0]
413 );
414 for (i, &val) in adot.iter().enumerate() {
415 assert!(val.is_finite(), "Adot[{}] should be finite, got {}", i, val);
416 }
417 }
418
419 #[test]
420 fn test_compute_adot_n1() {
421 let n = 1;
422 let inprod = vec![1.0]; let adot = compute_adot(n, &inprod);
424
425 assert_eq!(adot.len(), 1, "n=1 should give length 1");
426 assert!(
427 (adot[0] - PI * 2.0).abs() < 1e-10,
428 "n=1: first element should be π*2, got {}",
429 adot[0]
430 );
431 }
432
433 #[test]
434 fn test_compute_adot_invalid() {
435 assert!(compute_adot(0, &[]).is_empty());
437
438 assert!(compute_adot(4, &[1.0, 2.0]).is_empty());
440 }
441
442 #[test]
445 fn test_pcvm_statistic_basic() {
446 let n = 4;
447 let mut inprod = vec![0.0; (n * (n + 1)) / 2];
448 for i in 1..=n {
449 let idx = i * (i - 1) / 2 + i - 1;
450 inprod[idx] = 1.0;
451 }
452 let adot = compute_adot(n, &inprod);
453 let residuals = vec![0.5, -0.3, 0.2, -0.1];
454
455 let stat = pcvm_statistic(&adot, &residuals);
456
457 assert!(stat.is_finite(), "PCvM statistic should be finite");
458 assert!(stat >= 0.0, "PCvM statistic should be non-negative");
459 }
460
461 #[test]
462 fn test_pcvm_statistic_zero_residuals() {
463 let n = 4;
464 let mut inprod = vec![0.0; (n * (n + 1)) / 2];
465 for i in 1..=n {
466 let idx = i * (i - 1) / 2 + i - 1;
467 inprod[idx] = 1.0;
468 }
469 let adot = compute_adot(n, &inprod);
470 let residuals = vec![0.0, 0.0, 0.0, 0.0];
471
472 let stat = pcvm_statistic(&adot, &residuals);
473 assert!(
474 stat.abs() < 1e-10,
475 "PCvM with zero residuals should be ~0, got {}",
476 stat
477 );
478 }
479
480 #[test]
481 fn test_pcvm_statistic_empty() {
482 assert!(pcvm_statistic(&[], &[]).abs() < 1e-10);
483 assert!(pcvm_statistic(&[1.0], &[]).abs() < 1e-10);
484 }
485
486 #[test]
489 fn test_rp_stat_basic() {
490 let n_proj = 3;
491 let residuals = vec![0.5, -0.3, 0.2, -0.1, 0.4];
492
493 let proj_x_ord: Vec<i32> = vec![
495 1, 3, 5, 2, 4, 2, 4, 1, 5, 3, 5, 1, 3, 4, 2, ];
499
500 let result = rp_stat(&proj_x_ord, &residuals, n_proj);
501
502 assert_eq!(result.cvm.len(), n_proj);
503 assert_eq!(result.ks.len(), n_proj);
504 for &cvm_val in &result.cvm {
505 assert!(cvm_val >= 0.0, "CvM stat should be non-negative");
506 assert!(cvm_val.is_finite(), "CvM stat should be finite");
507 }
508 for &ks_val in &result.ks {
509 assert!(ks_val >= 0.0, "KS stat should be non-negative");
510 assert!(ks_val.is_finite(), "KS stat should be finite");
511 }
512 }
513
514 #[test]
515 fn test_rp_stat_invalid() {
516 let result = rp_stat(&[], &[], 0);
517 assert!(result.cvm.is_empty());
518 assert!(result.ks.is_empty());
519
520 let result = rp_stat(&[], &[1.0], 0);
521 assert!(result.cvm.is_empty());
522 }
523
524 #[test]
527 fn test_knn_loocv_basic() {
528 let size = 5;
529 let k = 2;
530 let mut dist_data = vec![0.0; size * size];
532 for i in 0..size {
533 for j in 0..size {
534 dist_data[i + j * size] = ((i as f64) - (j as f64)).abs();
535 }
536 }
537 let dist = FdMatrix::from_column_major(dist_data, size, size).unwrap();
538 let y: Vec<f64> = (0..size).map(|i| i as f64 * 2.0).collect();
539
540 let mse = knn_loocv(&dist, &y, k);
541
542 assert!(mse.is_finite(), "k-NN LOOCV MSE should be finite");
543 assert!(mse >= 0.0, "k-NN LOOCV MSE should be non-negative");
544 }
545
546 #[test]
547 fn test_knn_loocv_perfect() {
548 let n = 4;
550 let k = 1;
551 let mut dist = FdMatrix::from_column_major(vec![100.0; n * n], n, n).unwrap();
553 for i in 0..n {
554 dist[(i, i)] = 0.0;
555 }
556 dist[(0, 1)] = 0.1;
558 dist[(1, 0)] = 0.1;
559 dist[(2, 3)] = 0.1;
560 dist[(3, 2)] = 0.1;
561
562 let y = vec![1.0, 1.0, 5.0, 5.0];
564 let mse = knn_loocv(&dist, &y, k);
565
566 assert!(
567 mse < 1e-10,
568 "k-NN LOOCV MSE should be ~0 for perfectly paired data, got {}",
569 mse
570 );
571 }
572
573 #[test]
574 fn test_knn_loocv_invalid() {
575 let empty = FdMatrix::zeros(0, 0);
576 assert!(knn_loocv(&empty, &[], 1).is_infinite());
577 let single = FdMatrix::from_column_major(vec![0.0], 1, 1).unwrap();
578 assert!(knn_loocv(&single, &[1.0], 0).is_infinite());
579 }
580}