1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
use crate::utilities::Classification;
use crate::utilities::{breaks_to_classification, to_vec_f64};
use num_traits::ToPrimitive;

/// Returns a Classification object following the Head-Tail Breaks algorithm given one-dimensional data
///
/// # Arguments
///
/// * `data` - A reference to a collection of unsorted data points to generate a Classification for
///
/// # Edge Cases
///
/// * Inputting large u64/i64 data (near their max values) will result in loss of precision because data is being cast to f64
///
/// # Examples
///
/// ```
/// use classify::get_head_tail_classification;
/// use classify::{Classification, Bin};
///
/// let data: Vec<f64> = vec![1.0/1.0, 1.0/2.0, 1.0/3.0, 1.0/4.0, 1.0/5.0,
///                           1.0/6.0, 1.0/7.0, 1.0/8.0, 1.0/9.0, 1.0/10.0];
///
/// let result: Classification = get_head_tail_classification(&data);
/// let expected: Classification = vec![
///     Bin{bin_start: 0.1, bin_end: 0.2928968253968254, count: 7},
///     Bin{bin_start: 0.2928968253968254, bin_end: 0.611111111111111, count: 2},
///     Bin{bin_start: 0.611111111111111, bin_end: 1.0, count: 1}
/// ];
///
/// assert!(result == expected);
/// ```
pub fn get_head_tail_classification<T: ToPrimitive>(data: &[T]) -> Classification {
    let breaks: Vec<f64> = get_head_tail_breaks(data);
    breaks_to_classification(&breaks, data)
}

/// Returns a vector of breaks generated through the Head-Tail Breaks algorithm given a dataset
///
/// # Arguments
///
/// * `data` - A reference to a collection of unsorted data points to generate breaks for
///
/// # Edge Cases
///
/// * Inputting large u64/i64 data (near their max values) will result in loss of precision because data is being cast to f64
///
/// # Examples
///
/// ```
/// use classify::get_head_tail_breaks;
///
/// let data: Vec<f64> = vec![1.0/1.0, 1.0/2.0, 1.0/3.0, 1.0/4.0, 1.0/5.0,
///                           1.0/6.0, 1.0/7.0, 1.0/8.0, 1.0/9.0, 1.0/10.0];
///
/// let result: Vec<f64> = get_head_tail_breaks(&data);
///
/// assert_eq!(result, vec![0.2928968253968254, 0.611111111111111]);
/// ```
pub fn get_head_tail_breaks<T: ToPrimitive>(data: &[T]) -> Vec<f64> {
    let data = to_vec_f64(data);

    let mut breaks: Vec<f64> = vec![];

    let mut sorted_data: Vec<f64> = vec![];
    for item in data.iter().take(data.len()) {
        sorted_data.push(*item);
    }
    sorted_data.sort_by(|a, b| a.partial_cmp(b).unwrap());
    head_tail_recursion(&sorted_data, &mut breaks);

    breaks
}

/// Recursive function used by get_head_tail_breaks that populates a vector of breaks according to the head-tail breaks algorithm
pub fn head_tail_recursion(data: &Vec<f64>, breaks: &mut Vec<f64>) {
    let mut mean: f64 = 0.0;
    for val in data {
        mean += val
    }
    mean /= data.len() as f64;

    breaks.push(mean);

    // Binary search to find first data point greater than or equal to the mean
    let mut low = 0;
    let mut high = data.len();
    let mut break_idx = 'outer: loop {
        let mid = (low + high) / 2;
        if mean < data[mid] as f64 {
            high = mid;
        } else if mean == data[mid] {
            break 'outer mid;
        } else if high - low <= 1 {
            break 'outer high;
        } else {
            low = mid;
        }
    };
    // Backtracking to the first occurrence of the target value
    while break_idx != 0 && data[break_idx] == data[break_idx - 1] {
        break_idx -= 1;
    }

    let head: Vec<f64> = data[break_idx..].to_vec();

    if !head.is_empty()
        && (head.len() as f64) / (data.len() as f64) <= 0.4
        && head[0] != head[head.len() - 1]
    {
        head_tail_recursion(&head, breaks);
    }
}