breakout/
edmx.rs

1// Robust estimation of 2[mean(X)-mean(Y)]^2 time normalization factor
2// This is the E-Divisive E-statistic when alpha = 2
3// Instead of calculating mean(X), we calculate median(X), and similarly for Y
4
5use std::cmp::Ordering;
6use std::collections::BinaryHeap;
7
8#[derive(PartialEq)]
9struct MaxItem(f64);
10
11impl Eq for MaxItem {}
12
13impl PartialOrd for MaxItem {
14    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
15        self.0.partial_cmp(&other.0)
16    }
17}
18
19impl Ord for MaxItem {
20    fn cmp(&self, other: &MaxItem) -> Ordering {
21        self.partial_cmp(other).unwrap()
22    }
23}
24
25#[derive(PartialEq)]
26struct MinItem(f64);
27
28impl Eq for MinItem {}
29
30impl PartialOrd for MinItem {
31    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
32        other.0.partial_cmp(&self.0)
33    }
34}
35
36impl Ord for MinItem {
37    fn cmp(&self, other: &MinItem) -> Ordering {
38        self.partial_cmp(other).unwrap()
39    }
40}
41
42fn get_median(m: &BinaryHeap<MinItem>, m2: &BinaryHeap<MaxItem>) -> f64 {
43    match m.len().cmp(&m2.len()) {
44        Ordering::Greater => m.peek().unwrap().0,
45        Ordering::Less => m2.peek().unwrap().0,
46        Ordering::Equal => (m.peek().unwrap().0 + m2.peek().unwrap().0) / 2.0,
47    }
48}
49
50fn add_to_heaps(m: &mut BinaryHeap<MinItem>, m2: &mut BinaryHeap<MaxItem>, x: f64) {
51    // decide on initial heap to place Item into
52    if m.is_empty() || x < m.peek().unwrap().0 {
53        m2.push(MaxItem(x));
54    } else {
55        m.push(MinItem(x));
56    }
57
58    // make sure that heaps are balanced
59    if m.len() > m2.len() + 1 {
60        m2.push(MaxItem(m.pop().unwrap().0));
61    } else if m2.len() > m.len() + 1 {
62        m.push(MinItem(m2.pop().unwrap().0));
63    }
64}
65
66pub fn edmx(z: &[f64], min_size: usize, _alpha: f64) -> (usize, f64) {
67    let mut left_min = BinaryHeap::new();
68    let mut left_max = BinaryHeap::new();
69
70    let mut stat_best = -3.0;
71    let mut t1 = 0;
72
73    let n = z.len();
74    for i in 0..min_size - 1 {
75        add_to_heaps(&mut left_min, &mut left_max, z[i]);
76    }
77
78    // iterate over breakout locations
79    for tau1 in min_size..n - min_size + 1 {
80        add_to_heaps(&mut left_min, &mut left_max, z[tau1 - 1]);
81
82        let mut right_min = BinaryHeap::new();
83        let mut right_max = BinaryHeap::new();
84
85        let medl = get_median(&left_min, &left_max);
86
87        // add first set of Items to the heaps for the right segment
88        for i in tau1..tau1 + min_size - 1 {
89            add_to_heaps(&mut right_min, &mut right_max, z[i]);
90        }
91
92        for tau2 in tau1 + min_size..n + 1 {
93            add_to_heaps(&mut right_min, &mut right_max, z[tau2 - 1]);
94            let medr = get_median(&right_min, &right_max);
95
96            let mut stat = (medl - medr).powi(2);
97            stat *= (tau1 * (tau2 - tau1)) as f64 / tau2 as f64;
98
99            if stat > stat_best {
100                t1 = tau1;
101                stat_best = stat;
102            }
103        }
104    }
105
106    (t1, stat_best)
107}