classif/
classif.rs

1use std::str::FromStr;
2use num_traits::{Float, NumAssignOps};
3
4use stats::mean;
5use jenks::get_jenks_breaks;
6
7#[derive(PartialEq, Debug)]
8/// The various type of classification methods availables.
9pub enum Classification {
10    EqualInterval,
11    HeadTail,
12    TailHead,
13    JenksNaturalBreaks,
14    Quantiles,
15    Arithmetic,
16}
17
18impl FromStr for Classification {
19    type Err = &'static str;
20
21    fn from_str(s: &str) -> Result<Self, Self::Err> {
22        match s {
23            "JenksNaturalBreaks" => Ok(Classification::JenksNaturalBreaks),
24            "Quantiles" => Ok(Classification::Quantiles),
25            "EqualInverval" => Ok(Classification::EqualInterval),
26            "HeadTail" => Ok(Classification::HeadTail),
27            "TailHead" => Ok(Classification::TailHead),
28            "Arithmetic" => Ok(Classification::Arithmetic),
29            _ => Err("Invalid classification name"),
30        }
31    }
32}
33
34/// A struct containing the bounds computed at its creation and some basic
35/// statistical informations : minimum, maximum and mean value.
36///
37/// The main field of `BoundsInfo` struct is the `bounds` field: a `Vec<T>`
38/// containing the bounds. The lower bound is the minimum of the input values
39/// and the upper bound is the maximum.
40///
41/// The [`get_class_index`] method allows to retrieve the index of the cluster in which
42/// belongs an input value.
43///
44/// ### Example :
45///
46/// ```rust
47/// extern crate classif;
48/// extern crate num_traits;
49///
50/// let values = vec![1.0, 1.3, 2.4, 5.0, 2.1, 5.3, 4.0, 3.0, 1.3, 4.3, 6.0, 2.1];
51/// let bounds_info = classif::BoundsInfo::new(4, &values, classif::Classification::EqualInterval).unwrap();
52/// // The first bounds value is the minimum:
53/// assert_eq!(*bounds_info.bounds.first().unwrap(), bounds_info.min);
54/// // And the last bounds value is the maximum:
55/// assert_eq!(*bounds_info.bounds.last().unwrap(), bounds_info.max);
56/// // So for 4 class we need a vector of 5 values :
57/// assert_eq!(bounds_info.bounds.len(), 5);
58/// // In which class belong the value 4.4 ?
59/// let ix = bounds_info.get_class_index(4.4).unwrap();
60/// ```
61/// [`get_class_index`]: struct.BoundsInfo.html#method.get_class_index
62pub struct BoundsInfo<T> {
63    pub type_classif: Classification,
64    pub nb_class: u32,
65    pub bounds: Vec<T>,
66    pub min: T,
67    pub max: T,
68    pub mean: T,
69}
70
71impl<T> BoundsInfo<T>
72    where T: Float + NumAssignOps
73{
74    pub fn new(nb_class: u32,
75               values: &[T],
76               type_classif: Classification)
77               -> Result<Self, &'static str> {
78        let nb_elem = values.len();
79        if nb_elem < 2 {
80            return Err("Too small number of values!".into());
81        } else if !(type_classif == Classification::HeadTail ||
82                    type_classif == Classification::TailHead) &&
83                  (nb_class < 2 || nb_class > nb_elem as u32) {
84            return Err("Invalid number of class");
85        }
86        let mut v = values.to_vec();
87        v.sort_by(|a, b| a.partial_cmp(b).unwrap());
88        let breaks = match type_classif {
89            Classification::JenksNaturalBreaks => get_jenks_breaks(&v, nb_class),
90            Classification::Quantiles => get_quantiles(&v, nb_class),
91            Classification::EqualInterval => get_equal_interval(&v, nb_class),
92            Classification::HeadTail => get_head_tail_breaks(&v),
93            Classification::TailHead => get_tail_head_breaks(&v),
94            Classification::Arithmetic => get_arithmetic_breaks(&v, nb_class),
95        };
96        Ok(BoundsInfo {
97               type_classif: type_classif,
98               nb_class: (breaks.len() - 1) as u32,
99               bounds: breaks,
100               min: v[0],
101               max: v[v.len() - 1],
102               mean: mean(&v),
103           })
104    }
105
106    /// Returns the index of the class to which the `value` belongs, wrapped
107    /// in an Option. Returns None if the value is outside the serie range.
108    pub fn get_class_index(&self, value: T) -> Option<u32> {
109        for i in 0..self.bounds.len() - 1 {
110            if value <= self.bounds[i + 1usize] && value >= self.bounds[i] {
111                return Some(i as u32);
112            }
113        }
114        None
115    }
116}
117
118/// Compute the equal interval breaks on a list of sorted values.
119pub fn get_equal_interval<T>(sorted_values: &[T], nb_class: u32) -> Vec<T>
120    where T: Float + NumAssignOps
121{
122    // values.sort_by(|a, b| a.partial_cmp(b).unwrap());
123    // let nb_elem = values.len();
124    let min = sorted_values.first().unwrap();
125    let max = sorted_values.last().unwrap();
126    let interval = (*max - *min) / T::from(nb_class).unwrap();
127    let mut breaks = Vec::new();
128    let mut val = *min;
129    for _ in 0..(nb_class + 1) {
130        breaks.push(val);
131        val += interval;
132    }
133    {
134        let last = breaks.last_mut().unwrap();
135        *last = *max;
136    }
137    breaks
138}
139
140/// Compute the quantiles breaks on a list of sorted values.
141pub fn get_quantiles<T>(sorted_values: &[T], nb_class: u32) -> Vec<T>
142    where T: Float
143{
144    let nb_elem: usize = sorted_values.len();
145    let mut breaks = Vec::new();
146    breaks.push(sorted_values[0]);
147    let step = nb_elem as f64 / nb_class as f64;
148    for i in 1..nb_class {
149        let qidx = (i as f64 * step + 0.49).floor() as usize;
150        breaks.push(sorted_values[qidx - 1]);
151    }
152    breaks.push(sorted_values[nb_elem - 1]);
153    breaks
154}
155
156/// Compute the "Head-Tail" breaks on a list of sorted values
157/// (to be used on heavily right skewed distributions).
158pub fn get_head_tail_breaks<T>(sorted_values: &[T]) -> Vec<T>
159    where T: Float + NumAssignOps
160{
161    let mut _mean = mean(&sorted_values);
162    let mut breaks = Vec::new();
163    let mut t;
164    breaks.push(sorted_values[0]);
165    loop {
166        t = sorted_values
167            .iter()
168            .filter(|&v| *v > _mean)
169            .cloned()
170            .collect::<Vec<T>>();
171        _mean = mean(&t);
172        breaks.push(_mean);
173        if t.len() < 2 {
174            break;
175        }
176    }
177    breaks
178}
179
180/// Compute the "Tail-Head" breaks on a list of sorted values
181/// (its actually just the inverse of the Head-Tail method,
182/// to be used on heavily left skewed distributions).
183pub fn get_tail_head_breaks<T>(sorted_values: &[T]) -> Vec<T>
184    where T: Float + NumAssignOps
185{
186    let mut _mean = mean(&sorted_values);
187    let mut breaks = Vec::new();
188    let mut t;
189    breaks.push(*sorted_values.last().unwrap());
190    loop {
191        t = sorted_values
192            .iter()
193            .filter(|&v| *v < _mean)
194            .cloned()
195            .collect::<Vec<T>>();
196        _mean = mean(&t);
197        breaks.push(_mean);
198        if t.len() < 2 {
199            break;
200        }
201    }
202    breaks.reverse();
203    breaks
204}
205
206/// Compute the "arithmetic progression" breaks on a list of sorted values.
207pub fn get_arithmetic_breaks<T>(sorted_values: &[T], nb_class: u32) -> Vec<T>
208    where T: Float + NumAssignOps
209{
210    let mut denominator = T::zero();
211    for i in 1..nb_class + 1 {
212        denominator += T::from(i).unwrap();
213    }
214    let mut breaks = Vec::new();
215    let tmp_min = sorted_values[0];
216    let tmp_max = sorted_values[sorted_values.len() - 1];
217    let interval = (tmp_max - tmp_min) / denominator;
218    breaks.push(tmp_min);
219    for i in 1..nb_class + 1 {
220        let v = breaks[(i - 1) as usize];
221        breaks.push(v + (T::from(i).unwrap() * interval));
222    }
223    breaks
224}