1use super::bin::Bin;
2use super::Histogram;
3use std::cmp::Ordering;
4use probability::prelude::{Binomial, Distribution};
5use rand::prelude::*;
6
7
8pub enum QuantileType {
10 Type1,
12 Type7,
14}
15
16fn binomial_reduce_random(n: u64, pr: f64, tgt: f64) -> u64 {
17 if pr <= 0.0 { 0 }
18 else if pr >= 1.0 { n }
19 else {
20 let cumbin = Binomial::new(n as usize, pr);
21 let (mut left, mut right) = (0, n);
22 let mut i = (n as f64 * pr) as u64;
23 let mut bias = match (n as f64 * 0.005) as u64 {
24 x if x < 2 => 2,
25 x => x,
26 };
27 while left < right {
28 let s = i as f64;
29 let cum = cumbin.distribution(s);
30 if i == right {
31 if right - left == 1 {
32 if tgt <= cum {
33 return i;
34 }
35 else {
36 return i-1;
37 }
38 }
39 }
40 if tgt <= cum {
41 if i == 0 {
42 return 0;
43 }
44 right = i;
45 }
46 else {
47 if i == n {
48 return n;
49 }
50 else {
51 left = i;
52 }
53 }
54
55 let mut skip = (right - left) / bias;
56 if bias != 2 && skip < 1 {
57 skip = (right - left) / 2;
58 }
59 if skip < 1 {
60 skip = 1;
61 }
62
63 if tgt <= cum {
64 i = right - skip;
65 }
66 else {
67 i = left + skip;
68 }
69 bias = 2;
70 }
71 0
72 }
73}
74pub(crate) fn binomial_reduce(n: u64, pr: f64) -> u64 {
75 let mut rng = rand::rng();
76 let tgt = rng.random::<f64>();
77 binomial_reduce_random(n, pr, tgt)
78}
79
80impl Histogram {
81 fn accum_moments<const N: usize>(&self, ks: &[i32; N]) -> [f64; N] {
82 self.bvs.iter().fold([0.0; N], |v, (bin, count)| {
83 let midpoint = bin.midpoint();
84 let cardinality = *count as f64;
85 let mut newv = [0.0; N];
86 for i in 0..N {
87 newv[i] = v[i] + midpoint.powi(ks[i]) * cardinality;
88 }
89 newv
90 })
91 }
92 pub fn mean(&self) -> f64 {
94 let r = self.accum_moments(&[0, 1]);
95 if r[0] == 0.0 {
96 f64::NAN
97 } else {
98 r[1] / r[0]
99 }
100 }
101 pub fn sum(&self) -> f64 {
103 self.accum_moments(&[1])[0]
104 }
105 pub fn moment(&self, k: i32) -> f64 {
107 let r = self.accum_moments(&[0, k]);
108 if r[0] == 0.0 {
109 f64::NAN
110 } else {
111 r[1] / r[0].powi(k)
112 }
113 }
114 pub fn stddev(&self) -> f64 {
116 let r = self.accum_moments(&[0, 1, 2]);
117 if r[0] == 0.0 {
118 f64::NAN
119 } else {
120 (r[2] / r[0] - (r[1] / r[0]).powi(2)).sqrt()
121 }
122 }
123
124 pub fn count_relative(&self, pivot: &Bin, ord: Ordering, inclusive: bool) -> u128 {
126 assert_ne!(ord, Ordering::Equal);
127 self.bvs.iter().fold(0u128, |v, (bin, count)| {
128 let rel = bin.cmp(pivot);
129 if rel == Ordering::Equal {
130 if inclusive {
131 v + (*count as u128)
132 } else {
133 v
134 }
135 } else if rel == ord {
136 v + *count as u128
137 } else {
138 v
139 }
140 })
141 }
142 pub fn count_below_inclusive(&self, v: f64) -> u128 {
144 self.count_relative(&v.into(), Ordering::Less, true)
145 }
146 pub fn count_below_exclusive(&self, v: f64) -> u128 {
148 self.count_relative(&v.into(), Ordering::Less, false)
149 }
150 pub fn count_above_inclusive(&self, v: f64) -> u128 {
152 self.count_relative(&v.into(), Ordering::Greater, true)
153 }
154 pub fn count_above_exclusive(&self, v: f64) -> u128 {
156 self.count_relative(&v.into(), Ordering::Greater, false)
157 }
158 pub fn count_nearby(&self, v: f64) -> u64 {
160 self.bvs
161 .get(&v.into())
162 .and_then(|x| Some(*x))
163 .or(Some(0u64))
164 .unwrap()
165 }
166
167 pub fn quantile_ex<const N: usize>(
169 &self,
170 qs: &[f64; N],
171 qt: QuantileType,
172 ) -> Result<[f64; N], super::Error> {
173 let mut qout = [f64::NAN; N];
174 if N == 0 {
175 return Ok([0.0; N]);
176 }
177 for i in 1..N {
178 if qs[i - 1] > qs[i] {
179 return Err(super::Error::QuantilesOutOfOrder);
180 }
181 }
182 if qs[0] < 0.0 || qs[N - 1] > 1.0 {
183 return Err(super::Error::QuantileOutOfBounds);
184 }
185
186 match self.count() {
187 0 => Ok(qout),
188 cu128 => {
189 let total_count = cu128 as f64;
190 match qt {
192 QuantileType::Type1 => {
193 for i in 0..N {
194 qout[i] = total_count * qs[i]
195 }
196 }
197 QuantileType::Type7 => {
198 for i in 0..N {
199 qout[i] = ((total_count - 1.0) * qs[i] + 1.0).floor()
200 }
201 }
202 };
203
204 struct Track {
206 bin_width: f64,
207 bin_left: f64,
208 bin_count: f64,
209 lower_cnt: f64,
210 upper_cnt: f64,
211 }
212 impl Track {
213 fn update(&mut self, bin: &Bin, count: u64) {
214 self.bin_width = bin.width_signed();
215 self.bin_left = bin.left();
216 self.bin_count = count as f64;
217 self.lower_cnt = self.upper_cnt;
218 self.upper_cnt = self.lower_cnt + count as f64;
219 }
220 }
221 let mut track = Track {
222 bin_width: 0.0,
223 bin_left: 0.0,
224 bin_count: 0.0,
225 lower_cnt: 0.0,
226 upper_cnt: 0.0,
227 };
228 let mut bv_iter = self.bvs.iter();
230 if let Some((bin, count)) = bv_iter.next() {
231 track.update(bin, *count);
232 }
233
234 for i_q in 0..N {
235 while track.upper_cnt < qout[i_q] {
236 match bv_iter.next() {
237 Some((bin,count)) => track.update(bin, *count),
238 _ => break
239 }
240 }
241 if track.bin_width == 0.0 {
242 qout[i_q] = track.bin_left;
243 }
244 else {
245 match qt {
246 QuantileType::Type1 => {
255 let qn = qout[i_q];
256 assert!(qn >= track.lower_cnt);
257 assert!(qn <= track.upper_cnt);
258 let k = match (qn - track.lower_cnt).ceil() {
259 0.0 => 1.0,
260 k => k,
261 };
262 qout[i_q] = track.bin_left + k / (track.bin_count + 1.0) * track.bin_width;
263 },
264 QuantileType::Type7 => {
271 let k = qout[i_q] - track.lower_cnt;
272 qout[i_q] = track.bin_left + k / (track.bin_count + 1.0) * track.bin_width;
273 },
274 }
275 }
276 }
277 Ok(qout)
278 }
279 }
280 }
281 pub fn quantile1<const N: usize>(
283 &self,
284 qs: &[f64; N],
285 ) -> Result<[f64; N], super::Error> {
286 self.quantile_ex(qs, QuantileType::Type1)
287 }
288 pub fn quantile7<const N: usize>(
290 &self,
291 qs: &[f64; N],
292 ) -> Result<[f64; N], super::Error> {
293 self.quantile_ex(qs, QuantileType::Type7)
294 }
295 pub fn inverse_quantile<const N: usize>(
297 &self,
298 vs: &[f64; N],
299 ) -> Result<[f64; N], super::Error> {
300 let mut out = [f64::NAN; N];
301 if N == 0 {
302 return Ok(out);
303 }
304 for i in 1..N {
305 if vs[i] < vs[i-1] {
306 return Err(super::Error::QuantilesOutOfOrder);
307 }
308 }
309 let total_count = match self.count() {
310 0 => { return Ok(out); },
311 x => { x as f64 },
312 };
313 let mut vs_idx = 0usize;
314 let mut threshold = vs[vs_idx];
315 let mut count_below = 0u128;
316 for (bin, count) in self.bvs.iter() {
317 let (bin_lower, bin_upper, bin_width) = match bin.val {
318 v if v < 0i8 => {
319 let (b, w) = (bin.absolute_min(), bin.width());
320 (b-w, b, w)
321 },
322 v if v > 0i8 => {
323 let (b, w) = (bin.absolute_min(), bin.width());
324 (b, b+w, w)
325 },
326 _ => (-1e-128,1e-128,0.0),
327 };
328
329 while threshold < bin_lower {
330 out[vs_idx] = count_below as f64 / total_count as f64;
331 vs_idx += 1;
332 if vs_idx >= N {
333 return Ok(out);
334 }
335 threshold = vs[vs_idx];
336 }
337 while threshold < bin_upper {
338 out[vs_idx] = if bin_width > 0.0 {
339 let pr = (threshold - bin_lower) / bin_width;
340 (count_below as f64 + pr * (*count) as f64) / total_count as f64
341 } else {
342 count_below as f64 / total_count as f64
343 };
344 vs_idx += 1;
345 if vs_idx >= N {
346 return Ok(out);
347 }
348 threshold = vs[vs_idx];
349 }
350 count_below += *count as u128;
351 }
352 while vs_idx < N {
353 out[vs_idx] = 1.0;
354 vs_idx += 1;
355 }
356 Ok(out)
357 }
358}
359
360#[cfg(test)]
361mod tests {
362 use super::*;
363
364 #[test]
365 fn counts() {
366 let h = hist![
367 (-10, 2),
368 (-7, 3),
369 (-0.1, 5),
370 (0, 1),
371 (3),
372 (7, 14),
373 (f64::NAN)
374 ];
375 assert_eq!(h.count_including_nan(), 27);
376 assert_eq!(h.count(), 26);
377 assert_eq!(h.count_below_inclusive(-0.1), 10);
378 assert_eq!(h.count_above_inclusive(-0.1), 21);
379 assert_eq!(h.count_below_exclusive(-0.1), 5);
380 assert_eq!(h.count_above_exclusive(-0.1), 16);
381 assert_eq!(h.count_nearby(-0.1), 5);
382 assert_eq!(h.count_nearby(0.0), 1);
383 }
384 #[test]
385 fn quantiles() -> Result<(), super::super::Error> {
386 let h = hist![0.123, 0, 0.43, 0.41, 0.415, 0.2201, 0.3201, 0.125, 0.13];
387 let qs = [0.0,0.95,0.99,1.0];
388 assert_eq!(h.quantile1(&qs)?, [0.0, 0.435, 0.435, 0.435]);
389 assert_eq!(h.quantile7(&qs)?, [0.0, 0.41666666666666663, 0.41666666666666663, 0.435]);
390 assert_eq!(h.mean(), 0.2443387586038558);
391 assert_eq!(h.sum(), 2.199048827434702);
392
393 let h = hist![1,1];
394 let a = 1.0+0.1*1.0/3.0;
395 let b = 1.0+0.1*2.0/3.0;
396 let qs = [ 0.0, 0.25, 0.5, 0.75, 1.0 ];
397 assert_eq!(h.quantile_ex(&qs, QuantileType::Type1)?, [a,a,a,b,b]);
398 assert_eq!(h.quantile_ex(&qs, QuantileType::Type7)?, [a,a,a,a,b]);
399
400 let h = hist![1];
401 let qs = [ 0.0, 0.25, 0.5, 1.0 ];
402 assert_eq!(h.quantile1(&qs)?, [1.05, 1.05, 1.05, 1.05]);
403 assert_eq!(h.quantile7(&qs)?, [1.05, 1.05, 1.05, 1.05]);
404
405 let h = hist![1,1,1];
406 let qs = [ 0.0, 0.5, 1.0 ];
407 assert_eq!(h.quantile1(&qs)?, [1.025, 1.05, 1.075]);
408 assert_eq!(h.quantile7(&qs)?, [1.025, 1.05, 1.075]);
409
410 let h = hist![1.0, 2.0];
411 assert_eq!(h.quantile1(&[0.5])?, [1.05]);
412 assert_eq!(h.quantile7(&[0.5])?, [1.05]);
413
414 let h = hist![1.0, 1e200];
415 assert_eq!(h.quantile1(&[0.0,1.0])?, [1.05,1.05]);
416 assert_eq!(h.quantile7(&[0.0,1.0])?, [1.05,1.05]);
417 assert_eq!(h.mean(), 1.0476190476190477);
418
419 let h = hist![(1e200,3),(0.0,2),(1e-20,3),1e-10];
420 assert_eq!(h.quantile1(&[0.0,1.0])?, [0.0, 1.05e-10]);
421 assert_eq!(h.quantile7(&[0.0,1.0])?, [0.0, 1.05e-10]);
422
423 let h = hist![0,1];
424 let qs = [0.0,0.1,0.499,0.501,0.9,1.0];
425 assert_eq!(h.quantile1(&qs)?, [0.0,0.0,0.0,1.05,1.05,1.05]);
426 assert_eq!(h.quantile7(&qs)?, [0.0,0.0,0.0,0.0,0.0,1.05]);
427
428 let h = hist![10,100];
429 let qs = [0.0,0.1,0.499,0.501,0.9,1.0];
430 assert_eq!(h.quantile1(&qs)?, [10.5,10.5,10.5,105.0,105.0,105.0]);
431 assert_eq!(h.quantile7(&qs)?, [10.5,10.5,10.5,10.5,10.5,105.0]);
432 Ok(())
433 }
434
435 #[test]
436 fn inverse_quantiles() -> Result<(), super::super::Error> {
437 use approx::*;
438
439 let vs = [-200.0,-100.0,0.0,1.0,1.001,1.1,1.2,2.0,3.0,4.0];
440 let h = hist![];
441 let rq = h.inverse_quantile(&vs)?;
442 assert!(rq.iter().fold(true, |v, x| v && x.is_nan()));
443
444 let h = hist![(-100)];
445 assert_eq!([0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0], h.inverse_quantile(&vs)?);
446
447 let h = hist![0];
448 assert_eq!([0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0], h.inverse_quantile(&vs)?);
449
450 let h = hist![1,2,3];
451 [0.0,0.0,0.0,0.0,(1.0/3.0)/100.0,1.0/3.0,1.0/3.0,1.0/3.0,2.0/3.0,1.0]
452 .iter().zip(h.inverse_quantile(&vs)?
453 .iter()).for_each(|x| {
454 assert_relative_eq!(x.0, x.1, epsilon = 0.00000001);
455 });
456 Ok(())
457 }
458}