1#![cfg_attr(docsrs, feature(doc_auto_cfg))]
2use std::collections::HashMap;
3use std::ops::{Deref, DerefMut};
4use std::{hash, ops};
5
6#[cfg(feature = "regression")]
7#[path = "regression.rs"]
8pub mod regression;
9
10pub mod percentile;
11
12#[cfg(feature = "percentile-rand")]
13pub use percentile::percentile_rand;
14pub use percentile::{median, percentile, Fraction};
15#[cfg(feature = "ols")]
16pub use regression::best_fit_ols as regression_best_fit;
17#[cfg(feature = "regression")]
18pub use regression::{Determination, Predictive};
19
20use self::percentile::cluster;
21
22pub type Cluster = (f64, usize);
30
31#[derive(Debug)]
36pub struct OwnedClusterList {
37 list: Vec<Cluster>,
38 len: usize,
39}
40impl OwnedClusterList {
41 pub fn new(list: Vec<Cluster>) -> Self {
43 let len = ClusterList::size(&list);
44 Self { list, len }
45 }
46 pub fn borrow(&self) -> ClusterList {
47 ClusterList {
48 list: &self.list,
49 len: self.len,
50 }
51 }
52}
53impl Deref for OwnedClusterList {
54 type Target = [Cluster];
55 fn deref(&self) -> &Self::Target {
56 &self.list
57 }
58}
59impl DerefMut for OwnedClusterList {
60 fn deref_mut(&mut self) -> &mut Self::Target {
61 &mut self.list
62 }
63}
64
65#[derive(Debug, Copy, Clone)]
71#[repr(transparent)]
72pub struct F64OrdHash(pub f64);
73impl F64OrdHash {
74 #[inline(always)]
75 fn key(&self) -> u64 {
76 self.0.to_bits()
77 }
78
79 #[inline(always)]
81 pub fn f64_cmp(a: f64, b: f64) -> std::cmp::Ordering {
82 Self(a).cmp(&Self(b))
83 }
84}
85impl hash::Hash for F64OrdHash {
86 #[inline]
87 fn hash<H>(&self, state: &mut H)
88 where
89 H: hash::Hasher,
90 {
91 self.key().hash(state)
92 }
93}
94impl PartialEq for F64OrdHash {
95 #[inline]
96 fn eq(&self, other: &F64OrdHash) -> bool {
97 self.key() == other.key()
98 }
99}
100impl Eq for F64OrdHash {}
101impl PartialOrd for F64OrdHash {
102 #[inline]
103 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
104 Some(self.cmp(other))
105 }
106}
107impl Ord for F64OrdHash {
108 #[inline]
109 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
110 self.0
111 .partial_cmp(&other.0)
112 .unwrap_or_else(|| match (self.0.is_nan(), other.0.is_nan()) {
113 (true, true) | (false, false) => std::cmp::Ordering::Equal,
114 (true, false) => std::cmp::Ordering::Less,
115 (false, true) => std::cmp::Ordering::Greater,
116 })
117 }
118}
119
120#[derive(Debug)]
126pub struct ClusterList<'a> {
127 list: &'a [Cluster],
128 len: usize,
129}
130impl<'a> ClusterList<'a> {
131 pub fn new(list: &'a [Cluster]) -> Self {
133 let len = Self::size(list);
134 Self { list, len }
135 }
136
137 fn size(list: &[Cluster]) -> usize {
138 list.iter().map(|(_, count)| *count).sum()
139 }
140
141 pub fn len(&self) -> usize {
143 self.len
144 }
145 pub fn is_empty(&self) -> bool {
147 self.list.is_empty()
148 }
149 pub fn sum(&self) -> f64 {
151 let mut sum = 0.0;
152 for (v, count) in self.list.iter() {
153 sum += v * *count as f64;
154 }
155 sum
156 }
157 fn sum_squared_diff(&self, base: f64) -> f64 {
158 let mut sum = 0.0;
159 for (v, count) in self.list.iter() {
160 sum += (v - base).powi(2) * *count as f64;
161 }
162 sum
163 }
164 pub fn split_start(&self, len: usize) -> OwnedClusterList {
166 let mut sum = 0;
167 let mut list = Vec::new();
168 for (v, count) in self.list {
169 sum += count;
170 if sum >= len {
171 list.push((*v, *count - (sum - len)));
172 break;
173 } else {
174 list.push((*v, *count));
175 }
176 }
177 debug_assert_eq!(len, Self::size(&list));
178 OwnedClusterList { list, len }
179 }
180 pub fn split_end(&self, len: usize) -> OwnedClusterList {
182 let mut sum = 0;
183 let mut list = Vec::new();
184 for (v, count) in self.list.iter().rev() {
185 sum += count;
186 if sum >= len {
187 list.insert(0, (*v, *count - (len - sum)));
188 break;
189 } else {
190 list.insert(0, (*v, *count))
191 }
192 }
193 debug_assert_eq!(len, Self::size(&list));
194 OwnedClusterList { list, len }
195 }
196 #[inline]
202 #[allow(clippy::should_implement_trait)] pub fn index(&self, mut idx: usize) -> &f64 {
204 for (v, c) in self.list {
205 let c = *c;
206 if idx < c {
207 return v;
208 }
209 idx -= c;
210 }
211 &self.list.last().unwrap().0
212 }
213
214 pub fn optimize_values(self) -> OwnedClusterList {
220 let mut collected = HashMap::with_capacity(16);
221 for (v, count) in self.list {
222 let c = collected.entry(F64OrdHash(*v)).or_insert(0);
223 *c += count;
224 }
225 let list = collected.into_iter().map(|(f, c)| (f.0, c)).collect();
226 OwnedClusterList {
227 list,
228 len: self.len,
229 }
230 }
231}
232
233#[derive(Debug, PartialEq, Eq, Clone, Copy)]
235pub struct StandardDeviationOutput<T> {
236 pub standard_deviation: T,
237 pub mean: T,
238}
239#[derive(Debug, PartialEq, Clone, Copy)]
241pub struct PercentilesOutput {
242 pub median: f64,
243 pub lower_quadrille: Option<f64>,
244 pub higher_quadrille: Option<f64>,
245}
246
247pub trait Mean<'a, D>: std::iter::Sum<&'a Self> + ops::Div<Output = D>
251where
252 Self: 'a,
253{
254 fn from_usize(n: usize) -> Self;
255}
256#[cfg(feature = "generic-impls")]
257impl<'a, T: std::iter::Sum<&'a Self> + ops::Div + num_traits::FromPrimitive> Mean<'a, T::Output>
258 for T
259where
260 T: 'a,
261{
262 fn from_usize(n: usize) -> Self {
263 Self::from_usize(n).expect("Value can not be converted from usize. Check your type in the call to standard_deviation/mean.")
264 }
265}
266#[cfg(not(feature = "generic-impls"))]
267macro_rules! impl_mean {
268 ($($t:ty, )+) => {
269 $(
270 impl<'a> Mean<'a, <$t as ops::Div>::Output> for $t {
271 fn from_usize(n: usize) -> Self {
272 n as _
273 }
274 }
275 )+
276 };
277}
278#[cfg(not(feature = "generic-impls"))]
279impl_mean!(f32, f64, u8, u16, u32, u64, u128, usize, i8, i16, i32, i64, i128, isize,);
280
281pub trait StandardDeviation<'a>:
285 Copy
286 + Mean<'a, Self>
287 + std::iter::Sum<&'a Self>
288 + std::iter::Sum
289 + ops::Div<Output = Self>
290 + ops::Sub<Output = Self>
291 + ops::Mul<Output = Self>
292where
293 Self: 'a,
294{
295 fn one() -> Self;
296 fn sqrt(self) -> Self;
297 fn max(self, other: Self) -> Self;
298}
299#[cfg(feature = "generic-impls")]
300impl<
301 'a,
302 T: Copy
303 + Mean<'a, Self>
304 + PartialOrd
305 + std::iter::Sum<&'a Self>
306 + std::iter::Sum
307 + ops::Div<Output = Self>
308 + ops::Sub<Output = Self>
309 + ops::Mul<Output = Self>
310 + num_traits::identities::One
311 + num_traits::real::Real,
312 > StandardDeviation<'a> for T
313where
314 T: 'a,
315{
316 fn one() -> Self {
317 Self::one()
318 }
319 fn sqrt(self) -> Self {
320 self.sqrt()
321 }
322 fn max(self, other: Self) -> Self {
323 if self < other {
324 other
325 } else {
326 self
327 }
328 }
329}
330#[cfg(not(feature = "generic-impls"))]
331macro_rules! impl_std_dev {
332 ($($t:ty, )+) => {
333 $(
334 impl<'a> StandardDeviation<'a> for $t {
335 fn one() -> Self {
336 1.0
337 }
338 fn sqrt(self) -> Self {
339 <$t>::sqrt(self)
340 }
341 fn max(self, other: Self) -> Self {
342 if self < other {
343 other
344 } else {
345 self
346 }
347 }
348 }
349 )+
350 };
351}
352#[cfg(not(feature = "generic-impls"))]
353impl_std_dev!(f32, f64,);
354
355pub fn mean_cluster(values: &ClusterList) -> f64 {
357 values.sum() / values.len() as f64
358}
359pub fn mean<'a, D, T: Mean<'a, D>>(values: &'a [T]) -> D {
361 values.iter().sum::<T>() / T::from_usize(values.len())
362}
363
364pub fn standard_deviation_cluster(values: &ClusterList) -> StandardDeviationOutput<f64> {
369 let m = mean_cluster(values);
370 let squared_deviations = values.sum_squared_diff(m);
371 let variance: f64 = squared_deviations / (values.len() - 1).max(1) as f64;
372 StandardDeviationOutput {
373 standard_deviation: variance.sqrt(),
374 mean: m,
375 }
376}
377pub fn standard_deviation<'a, T: StandardDeviation<'a>>(
382 values: &'a [T],
383) -> StandardDeviationOutput<T> {
384 let m = mean(values);
385 let squared_deviations: T = values
386 .iter()
387 .map(|t| {
388 let diff = *t - m;
389
390 diff * diff
391 })
392 .sum();
393 let len_minus_one = T::from_usize(values.len()) - T::one();
394 let denominator = len_minus_one.max(T::one());
396 let variance: T = squared_deviations / denominator;
397 let std_dev = variance.sqrt();
398
399 StandardDeviationOutput {
400 standard_deviation: std_dev,
401 mean: m,
402 }
403}
404
405pub fn percentiles_cluster(values: &mut OwnedClusterList) -> PercentilesOutput {
407 fn percentile(
408 values: &mut OwnedClusterList,
409 target: impl percentile::OrderedListIndex,
410 ) -> percentile::MeanValue<f64> {
411 #[cfg(feature = "percentile-rand")]
412 {
413 cluster::percentile_rand(values, target)
414 }
415 #[cfg(not(feature = "percentile-rand"))]
416 {
417 cluster::percentile(values, target, &mut cluster::pivot_fn::middle())
418 }
419 }
420 let lower = if values.borrow().len() >= 4 {
421 Some(percentile(values, Fraction::new(1, 4)).resolve())
422 } else {
423 None
424 };
425 let higher = if values.borrow().len() >= 4 {
426 Some(percentile(values, Fraction::new(3, 4)).resolve())
427 } else {
428 None
429 };
430 PercentilesOutput {
431 median: cluster::median(values).resolve(),
432 lower_quadrille: lower,
433 higher_quadrille: higher,
434 }
435}