1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
use num_traits::{Float, FromPrimitive, ToPrimitive};

use crate::arrayops::minmax;

pub fn percentile<T: Float + ToPrimitive>(values: &[T], percent: f64) -> T {
    let k = (values.len() - 1) as f64 * percent;
    let f = k.floor();
    let c = k.ceil();
    if (f - c).abs() < 1e-6 {
        return values[k as usize];
    }
    let d0 = values[f as usize] * T::from(c - k).unwrap();
    let d1 = values[c as usize] * T::from(k - f).unwrap();
    d0 + d1
}

pub fn freedman_diaconis_bin_width<T: Float + ToPrimitive>(values: &[T]) -> f64 {
    let q75 = percentile(values, 0.75);
    let q25 = percentile(values, 0.25);
    let iqr = (q75 - q25).to_f64().unwrap();
    2.0 * iqr * (values.len() as f64).powf(-1.0 / 3.0)
}

pub fn sturges_bin_width<T: Float + ToPrimitive>(values: &[T]) -> f64 {
    let d = (values.len() as f64 + 1.0).log2();
    let (min, max) = minmax(values);
    (max - min).to_f64().unwrap() / d
}

#[derive(Default, Debug, Clone)]
pub struct Histogram<T: Float + Default + FromPrimitive> {
    pub bin_count: Vec<usize>,
    pub bin_edges: Vec<T>,
}

impl<T: Float + Default + FromPrimitive> Histogram<T> {
    pub fn new(values: &[T], bins: usize) -> Histogram<T> {
        let mut hist = Histogram::default();
        hist.populate(values, bins);
        hist
    }

    pub fn clear(&mut self) {
        self.bin_count.clear();
        self.bin_edges.clear();
    }

    pub fn populate(self: &mut Histogram<T>, values: &[T], bins: usize) {
        let (mut min, mut max) = minmax(values);
        if min == max {
            min = min - T::from(0.5).unwrap();
            max = max + T::from(0.5).unwrap();
        }

        let binwidth = (max - min) / T::from(bins).unwrap();

        for i in 0..(bins + 1) {
            self.bin_edges.push(T::from(i).unwrap() * binwidth);
            if i < bins {
                self.bin_count.push(0);
            }
        }

        for x in values.iter() {
            let mut hit = false;
            for j in 1..bins + 1 {
                let binwidth = self.bin_edges[j];
                if x < &binwidth {
                    hit = true;
                    self.bin_count[j - 1] += 1;
                    break;
                }
            }

            if !hit {
                let j = self.bin_count.len() - 1;
                self.bin_count[j] += 1;
            }
        }
    }

    pub fn len(&self) -> usize {
        self.bin_count.len()
    }

    pub fn is_empty(&self) -> bool {
        self.len() == 0
    }
}