online_statistics/
quantile.rs

1use crate::sorted_window::SortedWindow;
2use num::{Float, FromPrimitive, ToPrimitive};
3use std::ops::{AddAssign, SubAssign};
4
5use crate::stats::Univariate;
6use serde::{Deserialize, Serialize};
7/// Running quantile estimator using P-square Algorithm.
8/// # Arguments
9/// * `q` - quantile value. **WARNING** Should between `0` and `1`. Defaults to `0.5`.
10/// # Examples
11/// ```
12/// use online_statistics::quantile::Quantile;
13/// use online_statistics::stats::Univariate;
14/// let data = vec![9.,7.,3.,2.,6.,1., 8., 5., 4.];
15/// let mut running_quantile: Quantile<f64> = Quantile::default();
16/// for x in data.iter(){
17///     running_quantile.update(*x as f64);
18///     //println!("{}", running_quantile.get());
19///     running_quantile.get();
20/// }
21/// assert_eq!(running_quantile.get(), 5.0);
22/// ```
23/// # References
24/// [^1]: [The P² Algorithm for Dynamic Univariateal Computing Calculation of Quantiles and Editor Histograms Without Storing Observations](https://www.cse.wustl.edu/~jain/papers/ftp/psqr.pdf)
25///
26/// [^2]: [P² quantile estimator: estimating the median without storing values](https://aakinshin.net/posts/p2-quantile-estimator/)
27#[derive(Clone, Debug, Serialize, Deserialize)]
28pub struct Quantile<F: Float + FromPrimitive + AddAssign + SubAssign> {
29    q: F,
30    desired_marker_position: Vec<F>,
31    marker_position: Vec<F>,
32    position: Vec<F>,
33    heights: Vec<F>,
34    heights_sorted: bool,
35}
36impl<F: Float + FromPrimitive + AddAssign + SubAssign> Quantile<F> {
37    pub fn new(q: F) -> Result<Self, &'static str> {
38        if F::from_f64(0.).unwrap() > q && F::from_f64(1.).unwrap() < q {
39            return Err("q should be betweek 0 and 1");
40        }
41        Ok(Self {
42            q,
43            desired_marker_position: vec![
44                F::from_f64(0.).unwrap(),
45                q / F::from_f64(2.).unwrap(),
46                q,
47                (F::from_f64(1.).unwrap() + q) / F::from_f64(2.).unwrap(),
48                F::from_f64(1.).unwrap(),
49            ],
50            marker_position: vec![
51                F::from_f64(1.).unwrap(),
52                F::from_f64(1.).unwrap() + F::from_f64(2.).unwrap() * q,
53                F::from_f64(1.).unwrap() + F::from_f64(4.).unwrap() * q,
54                F::from_f64(3.).unwrap() + F::from_f64(2.).unwrap() * q,
55                F::from_f64(5.).unwrap(),
56            ],
57            position: (1..=5).map(|x| F::from_i32(x).unwrap()).collect(),
58            heights: Vec::new(),
59            heights_sorted: false,
60        })
61    }
62    fn find_k(&mut self, x: F) -> usize {
63        let mut k: Option<usize> = None;
64        if x < self.heights[0] {
65            self.heights[0] = x;
66            k = Some(1);
67        } else {
68            for i in 1..=4 {
69                if self.heights[i - 1] <= x && x < self.heights[i] {
70                    k = Some(i);
71                    break;
72                }
73            }
74            // If k is None it means that the previous loop did not break
75            if let (Some(last_height), None) = (self.heights.last_mut(), k) {
76                if *last_height < x {
77                    *last_height = x;
78                }
79            }
80        }
81        k.unwrap_or(4)
82    }
83    fn compute_p2(qp1: F, q: F, qm1: F, d: F, np1: F, n: F, nm1: F) -> F {
84        let outer = d / (np1 - nm1);
85        let inner_left = (n - nm1 + d) * (qp1 - q) / (np1 - n);
86        let inner_right = (np1 - n - d) * (q - qm1) / (n - nm1);
87        q + outer * (inner_left + inner_right)
88    }
89
90    fn adjust(&mut self) {
91        for i in 1..4 {
92            let n = self.position[i];
93            let q = self.heights[i];
94
95            let mut d = self.marker_position[i] - n;
96            if (d >= F::from_f64(1.0).unwrap()
97                && self.position[i + 1] - n > F::from_f64(1.0).unwrap())
98                || (d <= F::from_f64(-1.).unwrap()
99                    && self.position[i - 1] - n < F::from_f64(-1.).unwrap())
100            {
101                d = F::from_f64(1.).unwrap().copysign(d);
102                let qp1 = self.heights[i + 1];
103                let qm1 = self.heights[i - 1];
104                let np1 = self.position[i + 1];
105                let nm1 = self.position[i - 1];
106
107                let qn = Quantile::compute_p2(qp1, q, qm1, d, np1, n, nm1);
108
109                if qm1 < qn && qn < qp1 {
110                    self.heights[i] = qn;
111                } else {
112                    // d can be equals to -1 so we complete the operation in isize domain and go back to usize
113                    let linear_index = (i.to_isize().unwrap() + d.to_isize().unwrap())
114                        .to_usize()
115                        .unwrap();
116                    self.heights[i] = q + d * (self.heights[linear_index] - q)
117                        / (self.position[linear_index] - n);
118                }
119                self.position[i] = n + d;
120            }
121        }
122    }
123}
124
125impl<F> Default for Quantile<F>
126where
127    F: Float + FromPrimitive + AddAssign + SubAssign,
128{
129    fn default() -> Self {
130        let q = F::from_f64(0.5).unwrap();
131        Self {
132            q,
133            desired_marker_position: vec![
134                F::from_f64(0.).unwrap(),
135                q / F::from_f64(2.).unwrap(),
136                q,
137                (F::from_f64(1.).unwrap() + q) / F::from_f64(2.).unwrap(),
138                F::from_f64(1.).unwrap(),
139            ],
140            marker_position: vec![
141                F::from_f64(1.).unwrap(),
142                F::from_f64(1.).unwrap() + F::from_f64(2.).unwrap() * q,
143                F::from_f64(1.).unwrap() + F::from_f64(4.).unwrap() * q,
144                F::from_f64(3.).unwrap() + F::from_f64(2.).unwrap() * q,
145                F::from_f64(5.).unwrap(),
146            ],
147            position: (1..6).map(|x| F::from_i32(x).unwrap()).collect(),
148            heights: Vec::new(),
149            heights_sorted: false,
150        }
151    }
152}
153
154impl<F: Float + FromPrimitive + AddAssign + SubAssign> Univariate<F> for Quantile<F> {
155    fn update(&mut self, x: F) {
156        // Initialisation
157        if self.heights.len() != 5 {
158            self.heights.push(x);
159        } else {
160            if !self.heights_sorted {
161                self.heights.sort_by(|x, y| x.partial_cmp(y).unwrap());
162                self.heights_sorted = true;
163            }
164            // Find cell k such that qk < Xj <= qk+i and adjust extreme values (q1 and q) if necessary
165            let k = self.find_k(x);
166
167            // Increment all positions greater than k
168            for (index, value) in self.position.iter_mut().enumerate() {
169                if index >= k {
170                    *value += F::from_f64(1.0).unwrap();
171                }
172            }
173
174            for (marker, desired_marker) in self
175                .marker_position
176                .iter_mut()
177                .zip(self.desired_marker_position.iter())
178            {
179                *marker += *desired_marker;
180            }
181            self.adjust();
182            self.heights.sort_by(|x, y| x.partial_cmp(y).unwrap());
183        }
184    }
185    fn get(&self) -> F {
186        if self.heights_sorted {
187            self.heights[2]
188        } else {
189            let length = F::from_usize(self.heights.len()).unwrap();
190            let index = (length - F::from_f64(1.).unwrap())
191                .max(F::from_f64(0.).unwrap())
192                .min(length * self.q)
193                .to_usize()
194                .unwrap();
195            self.heights[index]
196        }
197    }
198}
199
200/// Rolling quantile.
201/// # Arguments
202/// * `q` - quantile value. **WARNING** Should between `0` and `1`.
203/// * `window_size` - Size of the rolling window.
204/// # Examples
205/// ```
206/// use online_statistics::quantile::RollingQuantile;
207/// use online_statistics::stats::Univariate;
208/// let mut rolling_quantile: RollingQuantile<f64> = RollingQuantile::new(0.5_f64, 101).unwrap();
209/// for i in 0..=100{
210///     rolling_quantile.update(i as f64);
211///     //println!("{}", rolling_quantile.get());
212///     rolling_quantile.get();
213/// }
214/// assert_eq!(rolling_quantile.get(), 50.0);
215/// ```
216///
217
218#[derive(Serialize, Deserialize)]
219pub struct RollingQuantile<F: Float + FromPrimitive + AddAssign + SubAssign> {
220    sorted_window: SortedWindow<F>,
221    q: F,
222    window_size: usize,
223    lower: usize,
224    higher: usize,
225    frac: F,
226}
227
228impl<F: Float + FromPrimitive + AddAssign + SubAssign> RollingQuantile<F> {
229    pub fn new(q: F, window_size: usize) -> Result<Self, &'static str> {
230        if F::from_f64(0.).unwrap() > q && F::from_f64(1.).unwrap() < q {
231            return Err("q should be betweek 0 and 1");
232        }
233        let idx = q * (F::from_usize(window_size).unwrap() - F::from_f64(1.).unwrap());
234        let lower = idx.floor().to_usize().unwrap();
235        let mut higher = lower + 1;
236        if higher > window_size - 1 {
237            higher = lower.saturating_sub(1); // Avoid attempt to subtract with overflow
238        }
239
240        let frac = idx - F::from_usize(lower).unwrap();
241        Ok(Self {
242            sorted_window: SortedWindow::new(window_size),
243            q,
244            window_size,
245            lower,
246            higher,
247            frac,
248        })
249    }
250    fn prepare(&self) -> (usize, usize, F) {
251        if self.sorted_window.len() < self.window_size {
252            let idx = self.q
253                * (F::from_usize(self.sorted_window.len()).unwrap() - F::from_f64(1.).unwrap());
254            let lower = idx.floor().to_usize().unwrap();
255            let mut higher = lower + 1;
256            if higher > self.sorted_window.len() - 1 {
257                higher = self.sorted_window.len().saturating_sub(1); // Avoid attempt to subtract with overflow
258            }
259
260            let frac = idx - F::from_usize(lower).unwrap();
261            return (lower, higher, frac);
262        }
263        (self.lower, self.higher, self.frac)
264    }
265}
266
267impl<F: Float + FromPrimitive + AddAssign + SubAssign> Univariate<F> for RollingQuantile<F> {
268    fn update(&mut self, x: F) {
269        self.sorted_window.push_back(x);
270    }
271    fn get(&self) -> F {
272        let (lower, higher, frac) = self.prepare();
273        self.sorted_window[lower] + (self.sorted_window[higher] - self.sorted_window[lower]) * frac
274    }
275}
276#[cfg(test)]
277mod test {
278    #[test]
279    fn rolling_quantile_edge_case() {
280        use crate::quantile::RollingQuantile;
281        use crate::stats::Univariate;
282        let mut rolling_quantile: RollingQuantile<f64> = RollingQuantile::new(1.0_f64, 1).unwrap();
283        for i in 0..=1000 {
284            rolling_quantile.update(i as f64);
285            //println!("{}", rolling_quantile.get());
286            rolling_quantile.get();
287        }
288        assert_eq!(rolling_quantile.get(), 1000.0);
289    }
290
291    #[test]
292    fn quantile_d_negative() {
293        use crate::quantile::Quantile;
294        use crate::stats::Univariate;
295        let data: Vec<f64> = vec![
296            10.557707193831535,
297            8.100043020890668,
298            9.100117273476478,
299            8.892842952595291,
300            10.94588485665605,
301            10.706797949691644,
302            11.568718270819382,
303            8.347755330517664,
304        ];
305        let mut quantile = Quantile::new(0.25_f64).unwrap();
306        for d in data.into_iter() {
307            quantile.update(d);
308        }
309    }
310}