1pub fn ks_test<F: Fn(f64) -> f64>(xs: &[f64], cdf: F) -> f64 {
6 let mut xs_r: Vec<f64> = xs.to_vec();
7 xs_r.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap());
8
9 let n: f64 = xs_r.len() as f64;
10 xs_r.iter().enumerate().fold(0.0, |acc, (i, &x)| {
11 let diff = ((i as f64 + 1.0) / n - cdf(x)).abs();
12 if diff > acc {
13 diff
14 } else {
15 acc
16 }
17 })
18}
19
20fn empirical_cdf(xs: &[f64], vals: &[f64]) -> Vec<f64> {
23 let n: f64 = xs.len() as f64;
24 let mut cdf: Vec<f64> = Vec::with_capacity(vals.len());
25 let mut ix: usize = 0;
26 vals.iter().for_each(|y| {
27 while ix < xs.len() && *y > xs[ix] {
28 ix += 1;
29 }
30 let p = (ix as f64) / n;
31 cdf.push(p);
32 });
33 cdf
34}
35
36pub fn ks2sample(mut xs: Vec<f64>, mut ys: Vec<f64>) -> f64 {
38 let mut all_vals = xs.clone();
39 all_vals.extend(ys.clone());
40
41 xs.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap());
42 ys.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap());
43 all_vals.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap());
44
45 let cdf_x = empirical_cdf(&xs, &all_vals);
46 let cdf_y = empirical_cdf(&ys, &all_vals);
47
48 cdf_x.iter().zip(cdf_y).fold(0.0, |acc, (px, py)| {
49 let diff = (px - py).abs();
50 if diff > acc {
51 diff
52 } else {
53 acc
54 }
55 })
56}
57
58#[cfg(test)]
59mod tests {
60 use super::*;
61 use approx::*;
62
63 const TOL: f64 = 1E-8;
64
65 #[test]
66 fn empirical_cdf_when_xs_and_vals_same_length() {
67 let vals: Vec<f64> = vec![0.0, 1.5, 2.1, 3.0];
68 let xs: Vec<f64> = vec![1.0, 1.0, 2.0, 2.5];
69
70 let cdf = empirical_cdf(&xs, &vals);
71
72 assert_relative_eq!(cdf[0], 0.0, epsilon = TOL);
73 assert_relative_eq!(cdf[1], 0.5, epsilon = TOL);
74 assert_relative_eq!(cdf[2], 0.75, epsilon = TOL);
75 assert_relative_eq!(cdf[3], 1.0, epsilon = TOL);
76 }
77
78 #[test]
79 fn empirical_cdf_when_xs_all_same_value() {
80 let vals: Vec<f64> = vec![0.0, 1.5, 2.1, 3.0];
81 let xs: Vec<f64> = vec![1.0, 1.0, 1.0, 1.0];
82
83 let cdf = empirical_cdf(&xs, &vals);
84
85 assert_relative_eq!(cdf[0], 0.0, epsilon = TOL);
86 assert_relative_eq!(cdf[1], 1.0, epsilon = TOL);
87 assert_relative_eq!(cdf[2], 1.0, epsilon = TOL);
88 assert_relative_eq!(cdf[3], 1.0, epsilon = TOL);
89 }
90
91 #[test]
92 fn ks2sample_stat_should_be_zero_when_samples_are_identical() {
93 let xs: Vec<f64> = vec![1.0, 2.0, 3.0, 4.0];
94 let ys: Vec<f64> = vec![2.0, 1.0, 4.0, 3.0];
95
96 assert_relative_eq!(0.0, ks2sample(xs, ys), epsilon = TOL);
97 }
98
99 #[test]
100 fn ks2sample_stat_simple_value_test_1() {
101 let xs: Vec<f64> = vec![1.0, 1.0, 4.0, 4.0];
102 let ys: Vec<f64> = vec![1.0, 1.0, 1.0, 4.0];
103
104 assert_relative_eq!(0.25, ks2sample(xs, ys), epsilon = TOL);
105 }
106
107 #[test]
108 fn ks2sample_stat_simple_value_test_2() {
109 let xs: Vec<f64> =
110 vec![0.42, 0.24, 0.86, 0.85, 0.82, 0.82, 0.25, 0.78, 0.13, 0.27];
111 let ys: Vec<f64> =
112 vec![0.24, 0.27, 0.87, 0.29, 0.57, 0.44, 0.5, 0.00, 0.56, 0.03];
113
114 assert_relative_eq!(0.4, ks2sample(xs, ys), epsilon = TOL);
115 }
116}