1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
use crateClassification;
use crate;
use ToPrimitive;
/// Returns a Classification object following the Quantile Breaks algorithm given the desired number of bins and one-dimensional data
///
/// # Arguments
///
/// * `num_bins` - An integer (usize) representing the desired number of bins
/// * `data` - A reference to a collection of unsorted data points to generate a Classification for
///
/// # Edge Cases
///
/// * Inputting large u64/i64 data (near their max values) will result in loss of precision because data is being cast to f64
/// * The maximum number of bins generated by this algorithm is the number of unique values in the dataset
/// * If your dataset contains many duplicates, there is a chance that the number of bins produced by the algorithm differs from num_bins because duplicate breaks are removed
///
/// # Examples
///
/// ```
/// use classify::get_quantile_classification;
/// use classify::{Classification, Bin};
///
/// let data: Vec<f32> = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0];
/// let num_bins = 3;
///
/// let result: Classification = get_quantile_classification(num_bins, &data);
/// let expected: Classification = vec![
/// Bin{bin_start: 1.0, bin_end: 3.5, count: 3},
/// Bin{bin_start: 3.5, bin_end: 6.5, count: 3},
/// Bin{bin_start: 6.5, bin_end: 9.0, count: 3}
/// ];
///
/// assert!(result == expected);
/// ```
/// Returns a vector of breaks generated through the Quantile Breaks algorithm given the desired number of bins and a dataset
///
/// # Arguments
///
/// * `num_bins` - The desired number of bins
/// * `data` - A reference to a collection of unsorted data points to generate breaks for
///
/// # Edge Cases
///
/// * Inputting large u64/i64 data (near their max values) will result in loss of precision because data is being cast to f64
/// * The maximum number of bins generated by this algorithm is the number of unique values in the dataset
/// * If your dataset contains many duplicates, there is a chance that the number of bins produced by the algorithm differs from num_bins because duplicate breaks are removed
///
/// # Examples
///
/// ```
/// use classify::get_quantile_breaks;
///
/// let data: Vec<i32> = vec![1, 2, 3, 4, 5, 6, 7, 8, 9];
/// let num_bins = 3;
///
/// let result: Vec<f64> = get_quantile_breaks(num_bins, &data);
///
/// assert_eq!(result, vec![3.5, 6.5]);
/// ```