tycho_util/time/
p2.rs

1use std::fmt::{Debug, Formatter};
2
3#[derive(Default)]
4pub struct P2 {
5    quantile: f64,
6    heights: [i64; 5],
7    pos: [i64; 5],
8    n_pos: [f64; 5],
9    dn: [f64; 5],
10    count: usize,
11    filled: bool,
12}
13
14impl P2 {
15    pub fn new(quantile: f64) -> Result<Self, Error> {
16        if !(0.0..=1.0).contains(&quantile) {
17            return Err(Error::InvalidQuantile(quantile));
18        }
19        let p2 = Self {
20            quantile,
21            heights: [0; 5],
22            n_pos: [
23                0.0,
24                2.0 * quantile,
25                4.0 * quantile,
26                2.0 + 2.0 * quantile,
27                4.0,
28            ],
29            dn: [0.0, quantile / 2.0, quantile, (1.0 + quantile) / 2.0, 1.0],
30            pos: [0, 1, 2, 3, 4],
31            count: 0,
32            filled: false,
33        };
34
35        Ok(p2)
36    }
37
38    pub fn append(&mut self, data: i64) {
39        if self.count < 5 {
40            self.heights[self.count] = data;
41            self.count += 1;
42            return;
43        }
44        if !self.filled {
45            self.filled = true;
46            self.heights.sort_unstable();
47        }
48        self.append_data(data);
49    }
50
51    fn append_data(&mut self, data: i64) {
52        let mut k: isize = -1;
53        if data < self.heights[0] {
54            k = 0;
55            self.heights[0] = data;
56        } else if self.heights[4] <= data {
57            k = 3;
58            self.heights[4] = data;
59        } else {
60            for i in 1..5 {
61                if self.heights[i - 1] <= data && data < self.heights[i] {
62                    k = i as isize - 1;
63                    break;
64                }
65            }
66        }
67
68        for i in 0..5 {
69            if i > k as usize {
70                self.pos[i] += 1;
71            }
72            self.n_pos[i] += self.dn[i];
73        }
74
75        self.adjust_heights();
76    }
77
78    fn adjust_heights(&mut self) {
79        for i in 1..4 {
80            let n = self.pos[i];
81            let np1 = self.pos[i + 1];
82            let nm1 = self.pos[i - 1];
83            let d = self.n_pos[i] - n as f64;
84
85            if (d >= 1.0 && np1 - n > 1) || (d <= -1.0 && nm1 - n < -1) {
86                let d = if d >= 0.0 { 1 } else { -1 };
87
88                // Only adjust if values are different
89                if d > 0 && self.heights[i + 1] != self.heights[i] {
90                    let delta = self.heights[i + 1] - self.heights[i];
91                    let np1 = self.pos[i + 1];
92                    self.heights[i] += delta / (np1 - n).max(1);
93                } else if d < 0 && self.heights[i] != self.heights[i - 1] {
94                    let delta = self.heights[i] - self.heights[i - 1];
95                    let nm1 = self.pos[i - 1];
96                    self.heights[i] -= delta / (n - nm1).max(1);
97                }
98
99                self.pos[i] += d;
100            }
101        }
102    }
103    pub fn value(&self) -> i64 {
104        if !self.filled {
105            return match self.count {
106                0 => 0,
107                1 => self.heights[0],
108                _ => {
109                    let mut sorted = self.heights[..self.count].to_vec();
110                    sorted.sort_unstable();
111                    let rank = (self.quantile * self.count as f64) as usize;
112                    sorted[rank]
113                }
114            };
115        }
116        self.heights[2]
117    }
118}
119
120impl Debug for P2 {
121    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
122        f.debug_struct("P2")
123            .field("quantile", &self.quantile)
124            .field("heights", &self.heights)
125            .field("pos", &self.pos)
126            .field("n_pos", &self.n_pos)
127            .field("dn", &self.dn)
128            .field("count", &self.count)
129            .field("filled", &self.filled)
130            .field("value", &self.value())
131            .finish()
132    }
133}
134
135#[derive(Debug, thiserror::Error)]
136pub enum Error {
137    #[error("Invalid quantile: {0}. Quantile must be in [0, 1]")]
138    InvalidQuantile(f64),
139}
140
141#[cfg(test)]
142mod test {
143    use rand::rngs::StdRng;
144    use rand::{Rng, SeedableRng};
145
146    use super::P2;
147
148    #[test]
149    fn test_p2() {
150        let mut data = (0..=100).collect::<Vec<_>>();
151        let epsilon = 1;
152
153        assert_percentile(&data, 0.25, epsilon);
154        assert_percentile(&data, 0.5, epsilon);
155        assert_percentile(&data, 0.75, epsilon);
156
157        data.reverse();
158
159        assert_percentile(&data, 0.25, epsilon);
160        assert_percentile(&data, 0.5, epsilon);
161        assert_percentile(&data, 0.75, epsilon);
162
163        let repeated_data = vec![42; 1000];
164        assert_percentile(&repeated_data, 0.25, 0);
165        assert_percentile(&repeated_data, 0.5, 0);
166        assert_percentile(&repeated_data, 0.75, 0);
167
168        let mut rand_data = StdRng::seed_from_u64(42)
169            .sample_iter(rand::distr::Uniform::new(0, 100).unwrap())
170            .take(1000)
171            .collect::<Vec<_>>();
172
173        assert_percentile(&rand_data, 0.25, 5);
174        assert_percentile(&rand_data, 0.5, 5);
175        assert_percentile(&rand_data, 0.75, 5);
176
177        rand_data.reverse();
178
179        assert_percentile(&rand_data, 0.25, 5);
180        assert_percentile(&rand_data, 0.5, 5);
181        assert_percentile(&rand_data, 0.75, 5);
182
183        rand_data.retain(|&x| x % 2 == 0);
184
185        assert_percentile(&rand_data, 0.25, 5);
186        assert_percentile(&rand_data, 0.5, 5);
187        assert_percentile(&rand_data, 0.75, 5);
188
189        rand_data.reverse();
190
191        assert_percentile(&rand_data, 0.25, 5);
192        assert_percentile(&rand_data, 0.5, 5);
193        assert_percentile(&rand_data, 0.75, 5);
194    }
195
196    fn assert_percentile(data: &[i64], percentile: f64, epsilon: i64) {
197        let expected = get_percentile(data, percentile);
198        let mut p2 = P2::new(percentile).unwrap();
199        for &d in data {
200            p2.append(d);
201        }
202        let res = p2.value();
203        assert!(
204            (res - expected).abs() <= epsilon,
205            "Expected {}th percentile to be close to {}, got {} (diff > {})",
206            percentile * 100.0,
207            expected,
208            res,
209            epsilon
210        );
211    }
212
213    fn get_percentile(numbers: &[i64], percentile: f64) -> i64 {
214        let mut sorted = numbers.to_vec();
215        sorted.sort_unstable();
216
217        let index = (percentile * (sorted.len() - 1) as f64).round() as usize;
218        sorted[index]
219    }
220}