1use crate::accumulator::BinnedAccumulatorF64;
15use cjc_repro::KahanAccumulatorF64;
16
17fn binned_mean(data: &[f64]) -> f64 {
23 let mut acc = BinnedAccumulatorF64::new();
24 for &x in data {
25 acc.add(x);
26 }
27 acc.finalize() / data.len() as f64
28}
29
30fn sorted_copy(data: &[f64]) -> Vec<f64> {
32 let mut v = data.to_vec();
33 v.sort_by(f64::total_cmp);
34 v
35}
36
37pub fn variance(data: &[f64]) -> Result<f64, String> {
45 if data.is_empty() {
46 return Err("variance: empty data".into());
47 }
48 if data.len() == 1 {
49 return Ok(0.0);
50 }
51 let mean = binned_mean(data);
52 let mut acc = BinnedAccumulatorF64::new();
53 for &x in data {
54 let d = x - mean;
55 acc.add(d * d);
56 }
57 Ok(acc.finalize() / (data.len() - 1) as f64)
58}
59
60pub fn sample_variance(data: &[f64]) -> Result<f64, String> {
62 variance(data)
63}
64
65pub fn pop_variance(data: &[f64]) -> Result<f64, String> {
67 if data.is_empty() {
68 return Err("pop_variance: empty data".into());
69 }
70 let mean = binned_mean(data);
71 let mut acc = BinnedAccumulatorF64::new();
72 for &x in data {
73 let d = x - mean;
74 acc.add(d * d);
75 }
76 Ok(acc.finalize() / data.len() as f64)
77}
78
79pub fn sd(data: &[f64]) -> Result<f64, String> {
81 variance(data).map(|v| v.sqrt())
82}
83
84pub fn sample_sd(data: &[f64]) -> Result<f64, String> {
86 sd(data)
87}
88
89pub fn pop_sd(data: &[f64]) -> Result<f64, String> {
91 pop_variance(data).map(|v| v.sqrt())
92}
93
94pub fn se(data: &[f64]) -> Result<f64, String> {
96 let s = sample_sd(data)?;
97 Ok(s / (data.len() as f64).sqrt())
98}
99
100pub fn median(data: &[f64]) -> Result<f64, String> {
104 if data.is_empty() {
105 return Ok(f64::NAN);
106 }
107 let sorted = sorted_copy(data);
108 let n = sorted.len();
109 if n % 2 == 1 {
110 Ok(sorted[n / 2])
111 } else {
112 Ok((sorted[n / 2 - 1] + sorted[n / 2]) / 2.0)
113 }
114}
115
116pub fn quantile(data: &[f64], p: f64) -> Result<f64, String> {
119 if data.is_empty() {
120 return Err("quantile: empty data".into());
121 }
122 if !(0.0..=1.0).contains(&p) {
123 return Err(format!("quantile: p must be in [0, 1], got {p}"));
124 }
125 let sorted = sorted_copy(data);
126 let n = sorted.len();
127 if n == 1 {
128 return Ok(sorted[0]);
129 }
130 let idx = (n as f64 - 1.0) * p;
132 let lo = idx.floor() as usize;
133 let hi = idx.ceil() as usize;
134 let frac = idx - lo as f64;
135 if lo == hi || hi >= n {
136 Ok(sorted[lo])
137 } else {
138 Ok(sorted[lo] * (1.0 - frac) + sorted[hi] * frac)
139 }
140}
141
142pub fn iqr(data: &[f64]) -> Result<f64, String> {
144 let q1 = quantile(data, 0.25)?;
145 let q3 = quantile(data, 0.75)?;
146 Ok(q3 - q1)
147}
148
149pub fn skewness(data: &[f64]) -> Result<f64, String> {
151 if data.len() < 3 {
152 return Err("skewness: need at least 3 elements".into());
153 }
154 let mean = binned_mean(data);
155 let n = data.len() as f64;
156 let mut m2_acc = BinnedAccumulatorF64::new();
157 let mut m3_acc = BinnedAccumulatorF64::new();
158 for &x in data {
159 let d = x - mean;
160 m2_acc.add(d * d);
161 m3_acc.add(d * d * d);
162 }
163 let m2 = m2_acc.finalize() / n;
164 let m3 = m3_acc.finalize() / n;
165 let sigma3 = m2.powf(1.5);
166 if sigma3 == 0.0 {
167 return Err("skewness: zero variance".into());
168 }
169 Ok(m3 / sigma3)
170}
171
172pub fn kurtosis(data: &[f64]) -> Result<f64, String> {
174 if data.len() < 4 {
175 return Err("kurtosis: need at least 4 elements".into());
176 }
177 let mean = binned_mean(data);
178 let n = data.len() as f64;
179 let mut m2_acc = BinnedAccumulatorF64::new();
180 let mut m4_acc = BinnedAccumulatorF64::new();
181 for &x in data {
182 let d = x - mean;
183 let d2 = d * d;
184 m2_acc.add(d2);
185 m4_acc.add(d2 * d2);
186 }
187 let m2 = m2_acc.finalize() / n;
188 let m4 = m4_acc.finalize() / n;
189 if m2 == 0.0 {
190 return Err("kurtosis: zero variance".into());
191 }
192 Ok(m4 / (m2 * m2) - 3.0)
193}
194
195pub fn z_score(data: &[f64]) -> Result<Vec<f64>, String> {
197 if data.is_empty() {
198 return Err("z_score: empty data".into());
199 }
200 let mean = binned_mean(data);
201 let s = sd(data)?;
202 if s == 0.0 {
203 return Err("z_score: zero standard deviation".into());
204 }
205 Ok(data.iter().map(|&x| (x - mean) / s).collect())
206}
207
208pub fn standardize(data: &[f64]) -> Result<Vec<f64>, String> {
210 if data.is_empty() {
211 return Err("standardize: empty data".into());
212 }
213 let mut min_val = f64::INFINITY;
214 let mut max_val = f64::NEG_INFINITY;
215 for &x in data {
216 if x < min_val { min_val = x; }
217 if x > max_val { max_val = x; }
218 }
219 let range = max_val - min_val;
220 if range == 0.0 {
221 return Ok(vec![0.0; data.len()]);
222 }
223 Ok(data.iter().map(|&x| (x - min_val) / range).collect())
224}
225
226pub fn n_distinct(data: &[f64]) -> usize {
229 if data.is_empty() {
230 return 0;
231 }
232 let sorted = sorted_copy(data);
233 let mut count = 1;
234 for i in 1..sorted.len() {
235 if sorted[i].to_bits() != sorted[i - 1].to_bits() {
236 count += 1;
237 }
238 }
239 count
240}
241
242pub fn cor(x: &[f64], y: &[f64]) -> Result<f64, String> {
249 if x.len() != y.len() {
250 return Err("cor: arrays must have same length".into());
251 }
252 if x.len() < 2 {
253 return Err("cor: need at least 2 elements".into());
254 }
255 let mean_x = binned_mean(x);
256 let mean_y = binned_mean(y);
257 let mut cov_acc = BinnedAccumulatorF64::new();
258 let mut var_x_acc = BinnedAccumulatorF64::new();
259 let mut var_y_acc = BinnedAccumulatorF64::new();
260 for i in 0..x.len() {
261 let dx = x[i] - mean_x;
262 let dy = y[i] - mean_y;
263 cov_acc.add(dx * dy);
264 var_x_acc.add(dx * dx);
265 var_y_acc.add(dy * dy);
266 }
267 let denom = (var_x_acc.finalize() * var_y_acc.finalize()).sqrt();
268 if denom == 0.0 {
269 return Err("cor: zero variance in one or both arrays".into());
270 }
271 Ok(cov_acc.finalize() / denom)
272}
273
274pub fn cov(x: &[f64], y: &[f64]) -> Result<f64, String> {
276 if x.len() != y.len() {
277 return Err("cov: arrays must have same length".into());
278 }
279 if x.is_empty() {
280 return Err("cov: empty data".into());
281 }
282 let mean_x = binned_mean(x);
283 let mean_y = binned_mean(y);
284 let mut acc = BinnedAccumulatorF64::new();
285 for i in 0..x.len() {
286 acc.add((x[i] - mean_x) * (y[i] - mean_y));
287 }
288 Ok(acc.finalize() / x.len() as f64)
289}
290
291pub fn sample_cov(x: &[f64], y: &[f64]) -> Result<f64, String> {
293 if x.len() != y.len() {
294 return Err("sample_cov: arrays must have same length".into());
295 }
296 if x.len() < 2 {
297 return Err("sample_cov: need at least 2 elements".into());
298 }
299 let mean_x = binned_mean(x);
300 let mean_y = binned_mean(y);
301 let mut acc = BinnedAccumulatorF64::new();
302 for i in 0..x.len() {
303 acc.add((x[i] - mean_x) * (y[i] - mean_y));
304 }
305 Ok(acc.finalize() / (x.len() - 1) as f64)
306}
307
308pub fn cor_matrix(vars: &[&[f64]]) -> Result<Vec<f64>, String> {
311 let n = vars.len();
312 if n == 0 {
313 return Err("cor_matrix: no variables".into());
314 }
315 let len = vars[0].len();
316 for v in vars {
317 if v.len() != len {
318 return Err("cor_matrix: all variables must have same length".into());
319 }
320 }
321 let mut result = vec![0.0; n * n];
322 for i in 0..n {
323 result[i * n + i] = 1.0; for j in (i + 1)..n {
325 let r = cor(vars[i], vars[j])?;
326 result[i * n + j] = r;
327 result[j * n + i] = r;
328 }
329 }
330 Ok(result)
331}
332
333pub fn cov_matrix(vars: &[&[f64]]) -> Result<Vec<f64>, String> {
336 let n = vars.len();
337 if n == 0 {
338 return Err("cov_matrix: no variables".into());
339 }
340 let len = vars[0].len();
341 for v in vars {
342 if v.len() != len {
343 return Err("cov_matrix: all variables must have same length".into());
344 }
345 }
346 let mut result = vec![0.0; n * n];
347 for i in 0..n {
348 for j in i..n {
349 let c = cov(vars[i], vars[j])?;
350 result[i * n + j] = c;
351 result[j * n + i] = c;
352 }
353 }
354 Ok(result)
355}
356
357pub fn cumsum(data: &[f64]) -> Vec<f64> {
363 let mut acc = KahanAccumulatorF64::new();
364 let mut result = Vec::with_capacity(data.len());
365 for &x in data {
366 acc.add(x);
367 result.push(acc.finalize());
368 }
369 result
370}
371
372pub fn cumprod(data: &[f64]) -> Vec<f64> {
374 let mut prod = 1.0;
375 let mut result = Vec::with_capacity(data.len());
376 for &x in data {
377 prod *= x;
378 result.push(prod);
379 }
380 result
381}
382
383pub fn cummax(data: &[f64]) -> Vec<f64> {
385 let mut mx = f64::NEG_INFINITY;
386 let mut result = Vec::with_capacity(data.len());
387 for &x in data {
388 if x > mx { mx = x; }
389 result.push(mx);
390 }
391 result
392}
393
394pub fn cummin(data: &[f64]) -> Vec<f64> {
396 let mut mn = f64::INFINITY;
397 let mut result = Vec::with_capacity(data.len());
398 for &x in data {
399 if x < mn { mn = x; }
400 result.push(mn);
401 }
402 result
403}
404
405pub fn lag(data: &[f64], n: usize) -> Vec<f64> {
407 let mut result = Vec::with_capacity(data.len());
408 for i in 0..data.len() {
409 if i < n {
410 result.push(f64::NAN);
411 } else {
412 result.push(data[i - n]);
413 }
414 }
415 result
416}
417
418pub fn lead(data: &[f64], n: usize) -> Vec<f64> {
420 let mut result = Vec::with_capacity(data.len());
421 for i in 0..data.len() {
422 if i + n < data.len() {
423 result.push(data[i + n]);
424 } else {
425 result.push(f64::NAN);
426 }
427 }
428 result
429}
430
431pub fn rank(data: &[f64]) -> Vec<f64> {
434 let n = data.len();
435 let mut indexed: Vec<(usize, f64)> = data.iter().copied().enumerate().collect();
436 indexed.sort_by(|a, b| a.1.total_cmp(&b.1));
437 let mut ranks = vec![0.0; n];
438 let mut i = 0;
439 while i < n {
440 let mut j = i;
441 while j < n && indexed[j].1.to_bits() == indexed[i].1.to_bits() {
442 j += 1;
443 }
444 let avg = (i + 1..=j).map(|r| r as f64).sum::<f64>() / (j - i) as f64;
446 for k in i..j {
447 ranks[indexed[k].0] = avg;
448 }
449 i = j;
450 }
451 ranks
452}
453
454pub fn dense_rank(data: &[f64]) -> Vec<f64> {
456 let n = data.len();
457 let mut indexed: Vec<(usize, f64)> = data.iter().copied().enumerate().collect();
458 indexed.sort_by(|a, b| a.1.total_cmp(&b.1));
459 let mut ranks = vec![0.0; n];
460 let mut current_rank = 1.0;
461 for i in 0..n {
462 if i > 0 && indexed[i].1.to_bits() != indexed[i - 1].1.to_bits() {
463 current_rank += 1.0;
464 }
465 ranks[indexed[i].0] = current_rank;
466 }
467 ranks
468}
469
470pub fn row_number(data: &[f64]) -> Vec<f64> {
472 let n = data.len();
473 let mut indexed: Vec<(usize, f64)> = data.iter().copied().enumerate().collect();
474 indexed.sort_by(|a, b| a.1.total_cmp(&b.1).then(a.0.cmp(&b.0)));
475 let mut ranks = vec![0.0; n];
476 for (rank, &(orig_idx, _)) in indexed.iter().enumerate() {
477 ranks[orig_idx] = (rank + 1) as f64;
478 }
479 ranks
480}
481
482pub fn histogram(data: &[f64], n_bins: usize) -> Result<(Vec<f64>, Vec<usize>), String> {
485 if data.is_empty() {
486 return Err("histogram: empty data".into());
487 }
488 if n_bins == 0 {
489 return Err("histogram: n_bins must be > 0".into());
490 }
491 let mut min_val = f64::INFINITY;
492 let mut max_val = f64::NEG_INFINITY;
493 for &x in data {
494 if x < min_val { min_val = x; }
495 if x > max_val { max_val = x; }
496 }
497 let range = max_val - min_val;
498 let width = if range == 0.0 { 1.0 } else { range / n_bins as f64 };
499 let mut edges = Vec::with_capacity(n_bins + 1);
500 for i in 0..=n_bins {
501 edges.push(min_val + width * i as f64);
502 }
503 let mut counts = vec![0usize; n_bins];
504 for &x in data {
505 let bin = if range == 0.0 {
506 0
507 } else {
508 let b = ((x - min_val) / width).floor() as usize;
509 b.min(n_bins - 1) };
511 counts[bin] += 1;
512 }
513 Ok((edges, counts))
514}
515
516pub fn weighted_mean(data: &[f64], weights: &[f64]) -> Result<f64, String> {
523 if data.len() != weights.len() {
524 return Err("weighted_mean: data and weights must have same length".into());
525 }
526 if data.is_empty() {
527 return Err("weighted_mean: empty data".into());
528 }
529 let mut num_acc = BinnedAccumulatorF64::new();
530 let mut den_acc = BinnedAccumulatorF64::new();
531 for i in 0..data.len() {
532 num_acc.add(data[i] * weights[i]);
533 den_acc.add(weights[i]);
534 }
535 let denom = den_acc.finalize();
536 if denom == 0.0 {
537 return Err("weighted_mean: weights sum to zero".into());
538 }
539 Ok(num_acc.finalize() / denom)
540}
541
542pub fn weighted_var(data: &[f64], weights: &[f64]) -> Result<f64, String> {
545 if data.len() != weights.len() {
546 return Err("weighted_var: data and weights must have same length".into());
547 }
548 if data.is_empty() {
549 return Err("weighted_var: empty data".into());
550 }
551 let wm = weighted_mean(data, weights)?;
552 let mut num_acc = BinnedAccumulatorF64::new();
553 let mut den_acc = BinnedAccumulatorF64::new();
554 for i in 0..data.len() {
555 let d = data[i] - wm;
556 num_acc.add(weights[i] * d * d);
557 den_acc.add(weights[i]);
558 }
559 let denom = den_acc.finalize();
560 if denom == 0.0 {
561 return Err("weighted_var: weights sum to zero".into());
562 }
563 Ok(num_acc.finalize() / denom)
564}
565
566pub fn trimmed_mean(data: &[f64], proportion: f64) -> Result<f64, String> {
569 if data.is_empty() {
570 return Err("trimmed_mean: empty data".into());
571 }
572 if proportion < 0.0 || proportion >= 0.5 {
573 return Err("trimmed_mean: proportion must be in [0, 0.5)".into());
574 }
575 let sorted = sorted_copy(data);
576 let n = sorted.len();
577 let trim = (n as f64 * proportion).floor() as usize;
578 let trimmed = &sorted[trim..n - trim];
579 if trimmed.is_empty() {
580 return Err("trimmed_mean: all data trimmed".into());
581 }
582 Ok(binned_mean(trimmed))
583}
584
585pub fn winsorize(data: &[f64], proportion: f64) -> Result<Vec<f64>, String> {
588 if data.is_empty() {
589 return Err("winsorize: empty data".into());
590 }
591 if proportion < 0.0 || proportion >= 0.5 {
592 return Err("winsorize: proportion must be in [0, 0.5)".into());
593 }
594 if proportion == 0.0 {
595 return Ok(data.to_vec());
596 }
597 let lo = quantile(data, proportion)?;
598 let hi = quantile(data, 1.0 - proportion)?;
599 Ok(data.iter().map(|&x| {
600 if x < lo { lo } else if x > hi { hi } else { x }
601 }).collect())
602}
603
604pub fn mad(data: &[f64]) -> Result<f64, String> {
607 if data.is_empty() {
608 return Err("mad: empty data".into());
609 }
610 let med = median(data)?;
611 let abs_devs: Vec<f64> = data.iter().map(|&x| (x - med).abs()).collect();
612 median(&abs_devs)
613}
614
615pub fn mode(data: &[f64]) -> Result<f64, String> {
618 if data.is_empty() {
619 return Err("mode: empty data".into());
620 }
621 let sorted = sorted_copy(data);
622 let mut best_val = sorted[0];
623 let mut best_count = 1usize;
624 let mut cur_val = sorted[0];
625 let mut cur_count = 1usize;
626 for i in 1..sorted.len() {
627 if sorted[i].to_bits() == cur_val.to_bits() {
628 cur_count += 1;
629 } else {
630 if cur_count > best_count {
631 best_count = cur_count;
632 best_val = cur_val;
633 }
634 cur_val = sorted[i];
635 cur_count = 1;
636 }
637 }
638 if cur_count > best_count {
639 best_val = cur_val;
640 }
641 Ok(best_val)
642}
643
644pub fn percentile_rank(data: &[f64], value: f64) -> Result<f64, String> {
647 if data.is_empty() {
648 return Err("percentile_rank: empty data".into());
649 }
650 let n = data.len() as f64;
651 let mut below = 0usize;
652 let mut equal = 0usize;
653 for &x in data {
654 match x.total_cmp(&value) {
655 std::cmp::Ordering::Less => below += 1,
656 std::cmp::Ordering::Equal => equal += 1,
657 std::cmp::Ordering::Greater => {}
658 }
659 }
660 Ok((below as f64 + 0.5 * equal as f64) / n)
661}
662
663pub fn spearman_cor(x: &[f64], y: &[f64]) -> Result<f64, String> {
669 if x.len() != y.len() {
670 return Err("spearman_cor: arrays must have same length".into());
671 }
672 if x.len() < 2 {
673 return Err("spearman_cor: need at least 2 elements".into());
674 }
675 let rx = rank(x);
676 let ry = rank(y);
677 cor(&rx, &ry)
678}
679
680pub fn kendall_cor(x: &[f64], y: &[f64]) -> Result<f64, String> {
683 if x.len() != y.len() {
684 return Err("kendall_cor: arrays must have same length".into());
685 }
686 let n = x.len();
687 if n < 2 {
688 return Err("kendall_cor: need at least 2 elements".into());
689 }
690 let mut concordant: i64 = 0;
691 let mut discordant: i64 = 0;
692 let mut ties_x: i64 = 0;
693 let mut ties_y: i64 = 0;
694 for i in 0..n {
695 for j in (i + 1)..n {
696 let dx = x[i].total_cmp(&x[j]);
697 let dy = y[i].total_cmp(&y[j]);
698 match (dx, dy) {
699 (std::cmp::Ordering::Equal, std::cmp::Ordering::Equal) => {
700 ties_x += 1;
701 ties_y += 1;
702 }
703 (std::cmp::Ordering::Equal, _) => { ties_x += 1; }
704 (_, std::cmp::Ordering::Equal) => { ties_y += 1; }
705 _ => {
706 if dx == dy { concordant += 1; } else { discordant += 1; }
707 }
708 }
709 }
710 }
711 let n0 = (n * (n - 1) / 2) as f64;
712 let denom = ((n0 - ties_x as f64) * (n0 - ties_y as f64)).sqrt();
713 if denom == 0.0 {
714 return Err("kendall_cor: no variation in one or both arrays".into());
715 }
716 Ok((concordant - discordant) as f64 / denom)
717}
718
719pub fn partial_cor(x: &[f64], y: &[f64], z: &[f64]) -> Result<f64, String> {
721 let rxy = cor(x, y)?;
722 let rxz = cor(x, z)?;
723 let ryz = cor(y, z)?;
724 let denom_x = (1.0 - rxz * rxz).sqrt();
725 let denom_y = (1.0 - ryz * ryz).sqrt();
726 if denom_x == 0.0 || denom_y == 0.0 {
727 return Err("partial_cor: perfect correlation with control variable".into());
728 }
729 Ok((rxy - rxz * ryz) / (denom_x * denom_y))
730}
731
732pub fn cor_ci(x: &[f64], y: &[f64], alpha: f64) -> Result<(f64, f64), String> {
735 if x.len() != y.len() {
736 return Err("cor_ci: arrays must have same length".into());
737 }
738 let n = x.len();
739 if n < 4 {
740 return Err("cor_ci: need at least 4 elements".into());
741 }
742 if alpha <= 0.0 || alpha >= 1.0 {
743 return Err("cor_ci: alpha must be in (0, 1)".into());
744 }
745 let r = cor(x, y)?;
746 let z_r = r.atanh(); let se = 1.0 / ((n as f64 - 3.0).sqrt());
748 let z_crit = crate::distributions::normal_ppf(1.0 - alpha / 2.0)?;
749 let lo = (z_r - z_crit * se).tanh();
750 let hi = (z_r + z_crit * se).tanh();
751 Ok((lo, hi))
752}
753
754pub fn ntile(data: &[f64], n: usize) -> Result<Vec<f64>, String> {
761 if data.is_empty() {
762 return Err("ntile: empty data".into());
763 }
764 if n == 0 {
765 return Err("ntile: n must be > 0".into());
766 }
767 let len = data.len();
768 let mut indexed: Vec<(usize, f64)> = data.iter().copied().enumerate().collect();
770 indexed.sort_by(|a, b| a.1.total_cmp(&b.1).then(a.0.cmp(&b.0)));
771 let mut result = vec![0.0; len];
772 for (rank, &(orig_idx, _)) in indexed.iter().enumerate() {
773 let group = (rank * n / len) + 1;
774 result[orig_idx] = group as f64;
775 }
776 Ok(result)
777}
778
779pub fn percent_rank_fn(data: &[f64]) -> Result<Vec<f64>, String> {
782 if data.is_empty() {
783 return Err("percent_rank: empty data".into());
784 }
785 let n = data.len();
786 if n == 1 {
787 return Ok(vec![0.0]);
788 }
789 let ranks = rank(data);
790 Ok(ranks.iter().map(|&r| (r - 1.0) / (n as f64 - 1.0)).collect())
791}
792
793pub fn cume_dist(data: &[f64]) -> Result<Vec<f64>, String> {
795 if data.is_empty() {
796 return Err("cume_dist: empty data".into());
797 }
798 let n = data.len();
799 let mut indexed: Vec<(usize, f64)> = data.iter().copied().enumerate().collect();
801 indexed.sort_by(|a, b| a.1.total_cmp(&b.1));
802 let mut result = vec![0.0; n];
803 let mut i = 0;
804 while i < n {
805 let mut j = i;
806 while j < n && indexed[j].1.to_bits() == indexed[i].1.to_bits() {
807 j += 1;
808 }
809 let cd = j as f64 / n as f64;
810 for k in i..j {
811 result[indexed[k].0] = cd;
812 }
813 i = j;
814 }
815 Ok(result)
816}
817
818pub fn nth_element(data: &mut [f64], k: usize) -> f64 {
837 data.select_nth_unstable_by(k, f64::total_cmp);
838 data[k]
839}
840
841pub fn nth_element_copy(data: &[f64], k: usize) -> Result<f64, String> {
844 if data.is_empty() {
845 return Err("nth_element: empty data".into());
846 }
847 if k >= data.len() {
848 return Err(format!("nth_element: k={k} out of bounds (n={})", data.len()));
849 }
850 let mut buf = data.to_vec();
851 Ok(nth_element(&mut buf, k))
852}
853
854pub fn median_fast(data: &[f64]) -> Result<f64, String> {
857 if data.is_empty() {
858 return Ok(f64::NAN);
859 }
860 let n = data.len();
861 let mut buf = data.to_vec();
862 if n % 2 == 1 {
863 Ok(nth_element(&mut buf, n / 2))
864 } else {
865 let lo = nth_element(&mut buf, n / 2 - 1);
869 let hi = buf[n / 2..].iter().copied().fold(f64::INFINITY, f64::min);
871 Ok((lo + hi) / 2.0)
872 }
873}
874
875pub fn quantile_fast(data: &[f64], p: f64) -> Result<f64, String> {
877 if data.is_empty() {
878 return Err("quantile_fast: empty data".into());
879 }
880 if !(0.0..=1.0).contains(&p) {
881 return Err(format!("quantile_fast: p must be in [0, 1], got {p}"));
882 }
883 let n = data.len();
884 if n == 1 {
885 return Ok(data[0]);
886 }
887 let idx = (n as f64 - 1.0) * p;
888 let lo = idx.floor() as usize;
889 let hi = idx.ceil() as usize;
890 let frac = idx - lo as f64;
891
892 let mut buf = data.to_vec();
893 let lo_val = nth_element(&mut buf, lo);
894 if lo == hi || hi >= n {
895 Ok(lo_val)
896 } else {
897 let hi_val = buf[lo + 1..].iter().copied().fold(f64::INFINITY, f64::min);
900 Ok(lo_val * (1.0 - frac) + hi_val * frac)
901 }
902}
903
904pub fn sample_indices(n: usize, k: usize, replace: bool, seed: u64) -> Result<Vec<usize>, String> {
919 if n == 0 || k == 0 {
920 return Ok(Vec::new());
921 }
922 let mut rng = cjc_repro::Rng::seeded(seed);
923
924 if replace {
925 let mut result = Vec::with_capacity(k);
927 for _ in 0..k {
928 let idx = (rng.next_f64() * n as f64) as usize;
929 result.push(idx.min(n - 1)); }
931 Ok(result)
932 } else {
933 if k > n {
934 return Err(format!(
935 "sample_indices: k={k} > n={n} without replacement"
936 ));
937 }
938 let mut pool: Vec<usize> = (0..n).collect();
940 for i in 0..k {
941 let j = i + ((rng.next_f64() * (n - i) as f64) as usize).min(n - i - 1);
942 pool.swap(i, j);
943 }
944 pool.truncate(k);
945 Ok(pool)
946 }
947}
948
949pub fn filter_mask(data: &[f64], mask: &[bool]) -> Result<Vec<f64>, String> {
954 if data.len() != mask.len() {
955 return Err(format!(
956 "filter_mask: data len ({}) != mask len ({})",
957 data.len(), mask.len()
958 ));
959 }
960 Ok(data.iter().zip(mask.iter())
961 .filter(|(_, &m)| m)
962 .map(|(&v, _)| v)
963 .collect())
964}
965
966#[cfg(test)]
971mod tests {
972 use super::*;
973
974 #[test]
975 fn test_variance_basic() {
976 let data = [2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
978 let v = variance(&data).unwrap();
979 assert!((v - 32.0 / 7.0).abs() < 1e-12, "expected 32/7, got {v}");
981 }
982
983 #[test]
984 fn test_sd_basic() {
985 let data = [2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
986 let s = sd(&data).unwrap();
987 let expected = (32.0_f64 / 7.0).sqrt();
988 assert!((s - expected).abs() < 1e-12, "expected sqrt(32/7), got {s}");
989 }
990
991 #[test]
992 fn test_median_odd() {
993 let data = [1.0, 3.0, 5.0];
994 assert_eq!(median(&data).unwrap(), 3.0);
995 }
996
997 #[test]
998 fn test_median_even() {
999 let data = [1.0, 2.0, 3.0, 4.0];
1000 assert_eq!(median(&data).unwrap(), 2.5);
1001 }
1002
1003 #[test]
1004 fn test_quantile_basic() {
1005 let data: Vec<f64> = (1..=100).map(|i| i as f64).collect();
1006 let q25 = quantile(&data, 0.25).unwrap();
1007 let q50 = quantile(&data, 0.5).unwrap();
1008 let q75 = quantile(&data, 0.75).unwrap();
1009 assert!((q50 - 50.5).abs() < 1e-12);
1010 assert!((q25 - 25.75).abs() < 1e-12);
1011 assert!((q75 - 75.25).abs() < 1e-12);
1012 }
1013
1014 #[test]
1015 fn test_skewness_symmetric() {
1016 let data = [-3.0, -2.0, -1.0, 0.0, 1.0, 2.0, 3.0];
1018 let s = skewness(&data).unwrap();
1019 assert!(s.abs() < 1e-12, "skewness should be ~0, got {s}");
1020 }
1021
1022 #[test]
1023 fn test_kurtosis_normal() {
1024 let data: Vec<f64> = (0..1000).map(|i| -1.0 + 2.0 * (i as f64 / 999.0)).collect();
1026 let k = kurtosis(&data).unwrap();
1027 assert!((k - (-1.2)).abs() < 0.05, "excess kurtosis got {k}");
1028 }
1029
1030 #[test]
1031 fn test_z_score_basic() {
1032 let data = [2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
1033 let z = z_score(&data).unwrap();
1034 let z_mean: f64 = z.iter().sum::<f64>() / z.len() as f64;
1036 assert!(z_mean.abs() < 1e-12);
1037 }
1038
1039 #[test]
1040 fn test_determinism() {
1041 let data = [1.1, 2.2, 3.3, 4.4, 5.5];
1042 let v1 = variance(&data).unwrap();
1043 let v2 = variance(&data).unwrap();
1044 assert_eq!(v1.to_bits(), v2.to_bits());
1045 }
1046
1047 #[test]
1048 fn test_empty_data_error() {
1049 assert!(variance(&[]).is_err());
1050 assert!(sd(&[]).is_err());
1051 assert!(median(&[]).unwrap().is_nan());
1053 assert!(quantile(&[], 0.5).is_err());
1054 }
1055
1056 #[test]
1057 fn test_kahan_stability() {
1058 let data: Vec<f64> = (0..10000).map(|_| 0.1).collect();
1059 let v = variance(&data).unwrap();
1060 assert!(v.abs() < 1e-20, "variance of identical values should be ~0, got {v}");
1062 }
1063
1064 #[test]
1065 fn test_cor_perfect() {
1066 let x = [1.0, 2.0, 3.0, 4.0, 5.0];
1067 let y = [2.0, 4.0, 6.0, 8.0, 10.0];
1068 let r = cor(&x, &y).unwrap();
1069 assert!((r - 1.0).abs() < 1e-12);
1070 }
1071
1072 #[test]
1073 fn test_cor_negative() {
1074 let x = [1.0, 2.0, 3.0, 4.0, 5.0];
1075 let y = [10.0, 8.0, 6.0, 4.0, 2.0];
1076 let r = cor(&x, &y).unwrap();
1077 assert!((r - (-1.0)).abs() < 1e-12);
1078 }
1079
1080 #[test]
1081 fn test_cov_basic() {
1082 let x = [1.0, 2.0, 3.0];
1083 let y = [4.0, 5.0, 6.0];
1084 let c = cov(&x, &y).unwrap();
1085 assert!((c - 2.0 / 3.0).abs() < 1e-12);
1087 }
1088
1089 #[test]
1090 fn test_cumsum() {
1091 let data = [1.0, 2.0, 3.0, 4.0];
1092 assert_eq!(cumsum(&data), vec![1.0, 3.0, 6.0, 10.0]);
1093 }
1094
1095 #[test]
1096 fn test_cumprod() {
1097 let data = [1.0, 2.0, 3.0, 4.0];
1098 assert_eq!(cumprod(&data), vec![1.0, 2.0, 6.0, 24.0]);
1099 }
1100
1101 #[test]
1102 fn test_rank_basic() {
1103 let data = [3.0, 1.0, 4.0, 1.0];
1104 let r = rank(&data);
1105 assert_eq!(r, vec![3.0, 1.5, 4.0, 1.5]);
1109 }
1110
1111 #[test]
1112 fn test_dense_rank_basic() {
1113 let data = [3.0, 1.0, 4.0, 1.0];
1114 let r = dense_rank(&data);
1115 assert_eq!(r, vec![2.0, 1.0, 3.0, 1.0]);
1116 }
1117
1118 #[test]
1119 fn test_histogram_basic() {
1120 let data = [1.0, 2.0, 3.0, 4.0, 5.0];
1121 let (edges, counts) = histogram(&data, 5).unwrap();
1122 assert_eq!(edges.len(), 6);
1123 let total: usize = counts.iter().sum();
1124 assert_eq!(total, 5);
1125 }
1126
1127 #[test]
1128 fn test_lag_lead() {
1129 let data = [1.0, 2.0, 3.0, 4.0, 5.0];
1130 let lagged = lag(&data, 2);
1131 assert!(lagged[0].is_nan());
1132 assert!(lagged[1].is_nan());
1133 assert_eq!(lagged[2], 1.0);
1134
1135 let led = lead(&data, 2);
1136 assert_eq!(led[0], 3.0);
1137 assert!(led[3].is_nan());
1138 assert!(led[4].is_nan());
1139 }
1140
1141 #[test]
1142 fn test_standardize() {
1143 let data = [0.0, 5.0, 10.0];
1144 let s = standardize(&data).unwrap();
1145 assert_eq!(s, vec![0.0, 0.5, 1.0]);
1146 }
1147
1148 #[test]
1149 fn test_n_distinct() {
1150 let data = [1.0, 2.0, 2.0, 3.0, 3.0, 3.0];
1151 assert_eq!(n_distinct(&data), 3);
1152 }
1153
1154 #[test]
1157 fn test_weighted_mean_uniform() {
1158 let data = [1.0, 2.0, 3.0, 4.0, 5.0];
1159 let weights = [1.0, 1.0, 1.0, 1.0, 1.0];
1160 let wm = weighted_mean(&data, &weights).unwrap();
1161 assert!((wm - 3.0).abs() < 1e-12);
1162 }
1163
1164 #[test]
1165 fn test_weighted_mean_skewed() {
1166 let data = [1.0, 2.0, 3.0];
1167 let weights = [3.0, 0.0, 0.0];
1168 let wm = weighted_mean(&data, &weights).unwrap();
1169 assert!((wm - 1.0).abs() < 1e-12);
1170 }
1171
1172 #[test]
1173 fn test_weighted_mean_empty() {
1174 assert!(weighted_mean(&[], &[]).is_err());
1175 }
1176
1177 #[test]
1178 fn test_weighted_var_uniform() {
1179 let data = [2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
1180 let weights = vec![1.0; data.len()];
1181 let wv = weighted_var(&data, &weights).unwrap();
1182 let pv = pop_variance(&data).unwrap();
1183 assert!((wv - pv).abs() < 1e-12);
1184 }
1185
1186 #[test]
1187 fn test_trimmed_mean_10pct() {
1188 let data = [100.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, -50.0];
1190 let tm = trimmed_mean(&data, 0.1).unwrap();
1191 let expected = binned_mean(&[2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]);
1193 assert!((tm - expected).abs() < 1e-12);
1194 }
1195
1196 #[test]
1197 fn test_trimmed_mean_zero() {
1198 let data = [1.0, 2.0, 3.0, 4.0, 5.0];
1199 let tm = trimmed_mean(&data, 0.0).unwrap();
1200 assert!((tm - 3.0).abs() < 1e-12);
1201 }
1202
1203 #[test]
1204 fn test_trimmed_mean_invalid_proportion() {
1205 assert!(trimmed_mean(&[1.0, 2.0], 0.5).is_err());
1206 assert!(trimmed_mean(&[1.0, 2.0], -0.1).is_err());
1207 }
1208
1209 #[test]
1210 fn test_winsorize_basic() {
1211 let data = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
1212 let w = winsorize(&data, 0.1).unwrap();
1213 assert!(w.iter().all(|&x| x >= 1.0 && x <= 10.0));
1216 }
1217
1218 #[test]
1219 fn test_winsorize_no_change() {
1220 let data = [1.0, 2.0, 3.0];
1221 let w = winsorize(&data, 0.0).unwrap();
1222 assert_eq!(w, vec![1.0, 2.0, 3.0]);
1223 }
1224
1225 #[test]
1226 fn test_mad_symmetric() {
1227 let data = [-2.0, -1.0, 0.0, 1.0, 2.0];
1228 let m = mad(&data).unwrap();
1229 assert!((m - 1.0).abs() < 1e-12);
1231 }
1232
1233 #[test]
1234 fn test_mad_constant() {
1235 let data = [5.0, 5.0, 5.0];
1236 let m = mad(&data).unwrap();
1237 assert!((m - 0.0).abs() < 1e-12);
1238 }
1239
1240 #[test]
1241 fn test_mode_simple() {
1242 let data = [1.0, 2.0, 2.0, 3.0];
1243 assert!((mode(&data).unwrap() - 2.0).abs() < 1e-12);
1244 }
1245
1246 #[test]
1247 fn test_mode_tie() {
1248 let data = [2.0, 1.0, 2.0, 1.0];
1250 assert!((mode(&data).unwrap() - 1.0).abs() < 1e-12);
1251 }
1252
1253 #[test]
1254 fn test_mode_single() {
1255 assert!((mode(&[42.0]).unwrap() - 42.0).abs() < 1e-12);
1256 }
1257
1258 #[test]
1259 fn test_percentile_rank_median() {
1260 let data = [1.0, 2.0, 3.0, 4.0, 5.0];
1261 let pr = percentile_rank(&data, 3.0).unwrap();
1262 assert!((pr - 0.5).abs() < 1e-12);
1264 }
1265
1266 #[test]
1267 fn test_percentile_rank_min() {
1268 let data = [1.0, 2.0, 3.0, 4.0, 5.0];
1269 let pr = percentile_rank(&data, 1.0).unwrap();
1270 assert!((pr - 0.1).abs() < 1e-12);
1272 }
1273
1274 #[test]
1275 fn test_b1_determinism() {
1276 let data = [1.1, 2.2, 3.3, 4.4, 5.5];
1277 let weights = [1.0, 2.0, 3.0, 4.0, 5.0];
1278 let wm1 = weighted_mean(&data, &weights).unwrap();
1279 let wm2 = weighted_mean(&data, &weights).unwrap();
1280 assert_eq!(wm1.to_bits(), wm2.to_bits());
1281 let m1 = mad(&data).unwrap();
1282 let m2 = mad(&data).unwrap();
1283 assert_eq!(m1.to_bits(), m2.to_bits());
1284 }
1285
1286 #[test]
1289 fn test_spearman_perfect_monotone() {
1290 let x = [1.0, 2.0, 3.0, 4.0, 5.0];
1291 let y = [10.0, 20.0, 30.0, 40.0, 50.0];
1292 let r = spearman_cor(&x, &y).unwrap();
1293 assert!((r - 1.0).abs() < 1e-12);
1294 }
1295
1296 #[test]
1297 fn test_spearman_perfect_reverse() {
1298 let x = [1.0, 2.0, 3.0, 4.0, 5.0];
1299 let y = [50.0, 40.0, 30.0, 20.0, 10.0];
1300 let r = spearman_cor(&x, &y).unwrap();
1301 assert!((r - (-1.0)).abs() < 1e-12);
1302 }
1303
1304 #[test]
1305 fn test_spearman_nonlinear() {
1306 let x: Vec<f64> = (1..=10).map(|i| i as f64).collect();
1308 let y: Vec<f64> = x.iter().map(|&v| v * v).collect();
1309 let r = spearman_cor(&x, &y).unwrap();
1310 assert!((r - 1.0).abs() < 1e-12); }
1312
1313 #[test]
1314 fn test_spearman_equals_pearson_for_linear() {
1315 let x = [1.0, 2.0, 3.0, 4.0, 5.0];
1316 let y = [2.0, 4.0, 6.0, 8.0, 10.0];
1317 let sp = spearman_cor(&x, &y).unwrap();
1318 let pe = cor(&x, &y).unwrap();
1319 assert!((sp - pe).abs() < 1e-12);
1320 }
1321
1322 #[test]
1323 fn test_kendall_concordant() {
1324 let x = [1.0, 2.0, 3.0, 4.0, 5.0];
1325 let y = [1.0, 2.0, 3.0, 4.0, 5.0];
1326 let t = kendall_cor(&x, &y).unwrap();
1327 assert!((t - 1.0).abs() < 1e-12);
1328 }
1329
1330 #[test]
1331 fn test_kendall_discordant() {
1332 let x = [1.0, 2.0, 3.0, 4.0, 5.0];
1333 let y = [5.0, 4.0, 3.0, 2.0, 1.0];
1334 let t = kendall_cor(&x, &y).unwrap();
1335 assert!((t - (-1.0)).abs() < 1e-12);
1336 }
1337
1338 #[test]
1339 fn test_kendall_with_ties() {
1340 let x = [1.0, 2.0, 2.0, 3.0];
1341 let y = [1.0, 2.0, 3.0, 4.0];
1342 let t = kendall_cor(&x, &y).unwrap();
1343 assert!(t > 0.9 && t < 1.0);
1346 }
1347
1348 #[test]
1349 fn test_kendall_known_values() {
1350 let x = [12.0, 2.0, 1.0, 12.0, 2.0];
1352 let y = [1.0, 4.0, 7.0, 1.0, 0.0];
1353 let t = kendall_cor(&x, &y).unwrap();
1354 assert!(t < 0.0);
1356 }
1357
1358 #[test]
1359 fn test_partial_cor_no_confounding() {
1360 let x = [1.0, 2.0, 3.0, 4.0, 5.0];
1362 let y = [2.0, 4.0, 6.0, 8.0, 10.0];
1363 let z = [5.0, 3.0, 1.0, 4.0, 2.0]; let pc = partial_cor(&x, &y, &z).unwrap();
1365 assert!(pc > 0.95);
1367 }
1368
1369 #[test]
1370 fn test_cor_ci_contains_r() {
1371 let x = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
1372 let y = [2.0, 4.0, 5.0, 4.0, 5.0, 7.0, 8.0, 9.0, 10.0, 11.0];
1373 let r = cor(&x, &y).unwrap();
1374 let (lo, hi) = cor_ci(&x, &y, 0.05).unwrap();
1375 assert!(lo <= r && r <= hi, "r={r} should be in [{lo}, {hi}]");
1376 }
1377
1378 #[test]
1379 fn test_cor_ci_95_narrower_than_99() {
1380 let x: Vec<f64> = (1..=20).map(|i| i as f64).collect();
1382 let y: Vec<f64> = x.iter().enumerate().map(|(i, &v)| {
1383 v * 2.0 + 1.0 + if i % 3 == 0 { 0.5 } else { -0.3 }
1384 }).collect();
1385 let (lo95, hi95) = cor_ci(&x, &y, 0.05).unwrap();
1386 let (lo99, hi99) = cor_ci(&x, &y, 0.01).unwrap();
1387 assert!(hi95 - lo95 < hi99 - lo99);
1388 }
1389
1390 #[test]
1391 fn test_b2_determinism() {
1392 let x = [1.0, 2.0, 3.0, 4.0, 5.0];
1393 let y = [5.0, 3.0, 4.0, 2.0, 1.0];
1394 let s1 = spearman_cor(&x, &y).unwrap();
1395 let s2 = spearman_cor(&x, &y).unwrap();
1396 assert_eq!(s1.to_bits(), s2.to_bits());
1397 let k1 = kendall_cor(&x, &y).unwrap();
1398 let k2 = kendall_cor(&x, &y).unwrap();
1399 assert_eq!(k1.to_bits(), k2.to_bits());
1400 }
1401
1402 #[test]
1405 fn test_ntile_even() {
1406 let data: Vec<f64> = (1..=12).map(|i| i as f64).collect();
1407 let groups = ntile(&data, 4).unwrap();
1408 assert_eq!(groups, vec![1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0]);
1409 }
1410
1411 #[test]
1412 fn test_ntile_uneven() {
1413 let data: Vec<f64> = (1..=10).map(|i| i as f64).collect();
1414 let groups = ntile(&data, 3).unwrap();
1415 assert!(groups.iter().all(|&g| g >= 1.0 && g <= 3.0));
1417 let c1 = groups.iter().filter(|&&g| g == 1.0).count();
1419 let c2 = groups.iter().filter(|&&g| g == 2.0).count();
1420 let c3 = groups.iter().filter(|&&g| g == 3.0).count();
1421 assert!(c1 >= 3 && c1 <= 4);
1422 assert!(c2 >= 3 && c2 <= 4);
1423 assert!(c3 >= 3 && c3 <= 4);
1424 }
1425
1426 #[test]
1427 fn test_ntile_n_equals_len() {
1428 let data = [10.0, 20.0, 30.0, 40.0, 50.0];
1429 let groups = ntile(&data, 5).unwrap();
1430 assert_eq!(groups, vec![1.0, 2.0, 3.0, 4.0, 5.0]);
1431 }
1432
1433 #[test]
1434 fn test_ntile_error_empty() {
1435 assert!(ntile(&[], 3).is_err());
1436 }
1437
1438 #[test]
1439 fn test_ntile_error_zero() {
1440 assert!(ntile(&[1.0], 0).is_err());
1441 }
1442
1443 #[test]
1444 fn test_percent_rank_sorted() {
1445 let data = [1.0, 2.0, 3.0, 4.0, 5.0];
1446 let pr = percent_rank_fn(&data).unwrap();
1447 assert!((pr[0] - 0.0).abs() < 1e-12);
1448 assert!((pr[1] - 0.25).abs() < 1e-12);
1449 assert!((pr[2] - 0.5).abs() < 1e-12);
1450 assert!((pr[3] - 0.75).abs() < 1e-12);
1451 assert!((pr[4] - 1.0).abs() < 1e-12);
1452 }
1453
1454 #[test]
1455 fn test_percent_rank_ties() {
1456 let data = [1.0, 2.0, 2.0, 4.0];
1457 let pr = percent_rank_fn(&data).unwrap();
1458 assert!((pr[0] - 0.0).abs() < 1e-12);
1461 assert!((pr[1] - pr[2]).abs() < 1e-12); assert!((pr[1] - 0.5).abs() < 1e-12);
1463 }
1464
1465 #[test]
1466 fn test_percent_rank_single() {
1467 let pr = percent_rank_fn(&[42.0]).unwrap();
1468 assert_eq!(pr, vec![0.0]);
1469 }
1470
1471 #[test]
1472 fn test_cume_dist_sorted() {
1473 let data = [1.0, 2.0, 3.0, 4.0, 5.0];
1474 let cd = cume_dist(&data).unwrap();
1475 assert!((cd[0] - 0.2).abs() < 1e-12);
1476 assert!((cd[1] - 0.4).abs() < 1e-12);
1477 assert!((cd[2] - 0.6).abs() < 1e-12);
1478 assert!((cd[3] - 0.8).abs() < 1e-12);
1479 assert!((cd[4] - 1.0).abs() < 1e-12);
1480 }
1481
1482 #[test]
1483 fn test_cume_dist_ties() {
1484 let data = [1.0, 2.0, 2.0, 4.0];
1485 let cd = cume_dist(&data).unwrap();
1486 assert!((cd[0] - 0.25).abs() < 1e-12);
1487 assert!((cd[1] - 0.75).abs() < 1e-12);
1489 assert!((cd[2] - 0.75).abs() < 1e-12);
1490 assert!((cd[3] - 1.0).abs() < 1e-12);
1491 }
1492
1493 #[test]
1494 fn test_b5_determinism() {
1495 let data = [3.0, 1.0, 4.0, 1.0, 5.0, 9.0, 2.0, 6.0];
1496 let n1 = ntile(&data, 4).unwrap();
1497 let n2 = ntile(&data, 4).unwrap();
1498 for (a, b) in n1.iter().zip(n2.iter()) {
1499 assert_eq!(a.to_bits(), b.to_bits());
1500 }
1501 let pr1 = percent_rank_fn(&data).unwrap();
1502 let pr2 = percent_rank_fn(&data).unwrap();
1503 for (a, b) in pr1.iter().zip(pr2.iter()) {
1504 assert_eq!(a.to_bits(), b.to_bits());
1505 }
1506 }
1507
1508 #[test]
1513 fn test_nth_element_basic() {
1514 let mut data = vec![5.0, 2.0, 8.0, 1.0, 9.0, 3.0, 7.0, 4.0, 6.0];
1515 assert_eq!(nth_element(&mut data.clone(), 0), 1.0);
1517 assert_eq!(nth_element(&mut data.clone(), 8), 9.0);
1519 assert_eq!(nth_element(&mut data.clone(), 4), 5.0);
1521 }
1522
1523 #[test]
1524 fn test_nth_element_copy_basic() {
1525 let data = [5.0, 2.0, 8.0, 1.0, 9.0];
1526 assert_eq!(nth_element_copy(&data, 0).unwrap(), 1.0);
1527 assert_eq!(nth_element_copy(&data, 2).unwrap(), 5.0);
1528 assert_eq!(nth_element_copy(&data, 4).unwrap(), 9.0);
1529 }
1530
1531 #[test]
1532 fn test_nth_element_copy_errors() {
1533 assert!(nth_element_copy(&[], 0).is_err());
1534 assert!(nth_element_copy(&[1.0], 1).is_err());
1535 }
1536
1537 #[test]
1538 fn test_nth_element_single() {
1539 let mut data = vec![42.0];
1540 assert_eq!(nth_element(&mut data, 0), 42.0);
1541 }
1542
1543 #[test]
1544 fn test_nth_element_duplicates() {
1545 let mut data = vec![3.0, 1.0, 3.0, 1.0, 2.0];
1546 assert_eq!(nth_element(&mut data.clone(), 0), 1.0);
1547 assert_eq!(nth_element(&mut data.clone(), 1), 1.0);
1548 assert_eq!(nth_element(&mut data.clone(), 2), 2.0);
1549 assert_eq!(nth_element(&mut data.clone(), 3), 3.0);
1550 assert_eq!(nth_element(&mut data.clone(), 4), 3.0);
1551 }
1552
1553 #[test]
1554 fn test_nth_element_nan_handling() {
1555 let mut data = vec![3.0, f64::NAN, 1.0, 2.0];
1557 let val = nth_element(&mut data, 0);
1558 assert_eq!(val, 1.0);
1559 let mut data2 = vec![3.0, f64::NAN, 1.0, 2.0];
1560 let val2 = nth_element(&mut data2, 3);
1561 assert!(val2.is_nan());
1562 }
1563
1564 #[test]
1565 fn test_median_fast_odd() {
1566 assert!((median_fast(&[3.0, 1.0, 2.0]).unwrap() - 2.0).abs() < 1e-15);
1567 assert!((median_fast(&[5.0, 1.0, 9.0, 3.0, 7.0]).unwrap() - 5.0).abs() < 1e-15);
1568 }
1569
1570 #[test]
1571 fn test_median_fast_even() {
1572 assert!((median_fast(&[4.0, 1.0, 3.0, 2.0]).unwrap() - 2.5).abs() < 1e-15);
1573 assert!((median_fast(&[6.0, 1.0, 5.0, 2.0, 4.0, 3.0]).unwrap() - 3.5).abs() < 1e-15);
1574 }
1575
1576 #[test]
1577 fn test_median_fast_parity_with_sort_median() {
1578 let data = [7.0, 2.0, 9.0, 4.0, 5.0, 1.0, 8.0, 3.0, 6.0];
1580 let slow = median(&data).unwrap();
1581 let fast = median_fast(&data).unwrap();
1582 assert_eq!(slow.to_bits(), fast.to_bits(), "median_fast != median for odd");
1583
1584 let data2 = [7.0, 2.0, 9.0, 4.0, 5.0, 1.0, 8.0, 3.0];
1585 let slow2 = median(&data2).unwrap();
1586 let fast2 = median_fast(&data2).unwrap();
1587 assert_eq!(slow2.to_bits(), fast2.to_bits(), "median_fast != median for even");
1588 }
1589
1590 #[test]
1591 fn test_median_fast_empty() {
1592 assert!(median_fast(&[]).unwrap().is_nan());
1593 }
1594
1595 #[test]
1596 fn test_quantile_fast_basic() {
1597 let data = [1.0, 2.0, 3.0, 4.0, 5.0];
1598 assert!((quantile_fast(&data, 0.0).unwrap() - 1.0).abs() < 1e-15);
1599 assert!((quantile_fast(&data, 0.5).unwrap() - 3.0).abs() < 1e-15);
1600 assert!((quantile_fast(&data, 1.0).unwrap() - 5.0).abs() < 1e-15);
1601 }
1602
1603 #[test]
1604 fn test_quantile_fast_parity_with_sort_quantile() {
1605 let data = [7.0, 2.0, 9.0, 4.0, 5.0, 1.0, 8.0, 3.0, 6.0];
1606 for p_int in 0..=20 {
1607 let p = p_int as f64 / 20.0;
1608 let slow = quantile(&data, p).unwrap();
1609 let fast = quantile_fast(&data, p).unwrap();
1610 assert!(
1611 (slow - fast).abs() < 1e-12,
1612 "quantile mismatch at p={p}: sort={slow}, fast={fast}"
1613 );
1614 }
1615 }
1616
1617 #[test]
1618 fn test_sample_indices_with_replacement() {
1619 let idx = sample_indices(10, 5, true, 42).unwrap();
1620 assert_eq!(idx.len(), 5);
1621 assert!(idx.iter().all(|&i| i < 10));
1622 }
1623
1624 #[test]
1625 fn test_sample_indices_without_replacement() {
1626 let idx = sample_indices(10, 5, false, 42).unwrap();
1627 assert_eq!(idx.len(), 5);
1628 assert!(idx.iter().all(|&i| i < 10));
1629 let mut sorted = idx.clone();
1631 sorted.sort();
1632 sorted.dedup();
1633 assert_eq!(sorted.len(), 5);
1634 }
1635
1636 #[test]
1637 fn test_sample_indices_full_draw() {
1638 let idx = sample_indices(5, 5, false, 42).unwrap();
1639 assert_eq!(idx.len(), 5);
1640 let mut sorted = idx.clone();
1641 sorted.sort();
1642 assert_eq!(sorted, vec![0, 1, 2, 3, 4]);
1643 }
1644
1645 #[test]
1646 fn test_sample_indices_determinism() {
1647 let a = sample_indices(100, 20, false, 12345).unwrap();
1648 let b = sample_indices(100, 20, false, 12345).unwrap();
1649 assert_eq!(a, b, "same seed must produce identical indices");
1650 }
1651
1652 #[test]
1653 fn test_sample_indices_error() {
1654 assert!(sample_indices(5, 10, false, 0).is_err());
1655 }
1656
1657 #[test]
1658 fn test_sample_indices_empty() {
1659 assert_eq!(sample_indices(0, 5, true, 0).unwrap(), Vec::<usize>::new());
1660 assert_eq!(sample_indices(5, 0, true, 0).unwrap(), Vec::<usize>::new());
1661 }
1662
1663 #[test]
1664 fn test_filter_mask_basic() {
1665 let data = [1.0, 2.0, 3.0, 4.0, 5.0];
1666 let mask = [true, false, true, false, true];
1667 assert_eq!(filter_mask(&data, &mask).unwrap(), vec![1.0, 3.0, 5.0]);
1668 }
1669
1670 #[test]
1671 fn test_filter_mask_all_true() {
1672 let data = [1.0, 2.0, 3.0];
1673 let mask = [true, true, true];
1674 assert_eq!(filter_mask(&data, &mask).unwrap(), vec![1.0, 2.0, 3.0]);
1675 }
1676
1677 #[test]
1678 fn test_filter_mask_all_false() {
1679 let data = [1.0, 2.0, 3.0];
1680 let mask = [false, false, false];
1681 assert_eq!(filter_mask(&data, &mask).unwrap(), Vec::<f64>::new());
1682 }
1683
1684 #[test]
1685 fn test_filter_mask_length_mismatch() {
1686 assert!(filter_mask(&[1.0, 2.0], &[true]).is_err());
1687 }
1688}