stats/
minmax.rs

1#![allow(clippy::cast_lossless)]
2use serde::{Deserialize, Serialize};
3use std::cmp::Ordering;
4use std::fmt;
5
6use crate::Commute;
7
8/// Represents the current sort order of the data.
9#[derive(Clone, Copy, Debug, PartialEq, Eq, Deserialize, Serialize)]
10pub enum SortOrder {
11    Unsorted,
12    Ascending,
13    Descending,
14}
15
16/// A commutative data structure for tracking minimum and maximum values
17/// and detecting sort order in a stream of data.
18#[derive(Clone, Copy, Deserialize, Serialize, Eq, PartialEq)]
19pub struct MinMax<T> {
20    len: u32,
21    min: Option<T>,
22    max: Option<T>,
23    first_value: Option<T>,
24    last_value: Option<T>,
25    ascending_pairs: u32,
26    descending_pairs: u32,
27}
28
29impl<T: PartialOrd + Clone> MinMax<T> {
30    /// Create an empty state where min and max values do not exist.
31    #[must_use]
32    pub fn new() -> MinMax<T> {
33        Default::default()
34    }
35
36    /// Add a sample to the data and track min/max, the sort order & "sortiness".
37    #[inline]
38    pub fn add(&mut self, sample: T) {
39        match self.len {
40            // this comes first because it's the most common case
41            2.. => {
42                if let Some(ref last) = self.last_value {
43                    #[allow(clippy::match_same_arms)]
44                    match sample.partial_cmp(last) {
45                        Some(Ordering::Greater) => self.ascending_pairs += 1,
46                        Some(Ordering::Less) => self.descending_pairs += 1,
47                        // this comes last because it's the least common case
48                        Some(Ordering::Equal) => self.ascending_pairs += 1,
49                        None => {}
50                    }
51                }
52            }
53            0 => {
54                // first sample - clone for first_value and min, move to max
55                self.first_value = Some(sample.clone());
56                self.min = Some(sample.clone());
57                self.max = Some(sample);
58                self.len = 1;
59                return;
60            }
61            1 => {
62                // second sample
63                if let Some(ref first) = self.first_value {
64                    match sample.partial_cmp(first) {
65                        Some(Ordering::Greater | Ordering::Equal) => self.ascending_pairs = 1,
66                        Some(Ordering::Less) => self.descending_pairs = 1,
67                        None => {}
68                    }
69                }
70            }
71        }
72
73        // Update min/max
74        if self.min.as_ref().is_none_or(|v| &sample < v) {
75            self.min = Some(sample.clone());
76        } else if self.max.as_ref().is_none_or(|v| &sample > v) {
77            self.max = Some(sample.clone());
78        }
79
80        // Update last value and number of samples
81        self.last_value = Some(sample);
82        self.len += 1;
83    }
84
85    /// Returns the minimum of the data set.
86    ///
87    /// `None` is returned if and only if the number of samples is `0`.
88    #[inline]
89    #[must_use]
90    pub const fn min(&self) -> Option<&T> {
91        self.min.as_ref()
92    }
93
94    /// Returns the maximum of the data set.
95    ///
96    /// `None` is returned if and only if the number of samples is `0`.
97    #[inline]
98    #[must_use]
99    pub const fn max(&self) -> Option<&T> {
100        self.max.as_ref()
101    }
102
103    /// Returns the number of data points.
104    #[inline]
105    #[must_use]
106    pub const fn len(&self) -> usize {
107        self.len as usize
108    }
109
110    /// Returns true if there are no data points.
111    #[inline]
112    #[must_use]
113    pub const fn is_empty(&self) -> bool {
114        self.len == 0
115    }
116
117    /// Returns the current sort order of the data.
118    #[inline]
119    #[must_use]
120    pub fn sort_order(&self) -> SortOrder {
121        let sortiness = self.sortiness();
122        // Use 1e-9 to handle floating point imprecision
123        // don't use f64::EPSILON because it's too small
124        if (sortiness - 1.0).abs() <= 1e-9 {
125            SortOrder::Ascending
126        } else if (sortiness + 1.0).abs() <= 1e-9 {
127            SortOrder::Descending
128        } else {
129            SortOrder::Unsorted
130        }
131    }
132
133    /// Calculates a "sortiness" score for the data, indicating how close it is to being sorted.
134    ///
135    /// Returns a value between -1.0 and 1.0:
136    /// * 1.0 indicates perfectly ascending order
137    /// * -1.0 indicates perfectly descending order
138    /// * Values in between indicate the general tendency towards ascending or descending order
139    /// * 0.0 indicates either no clear ordering or empty/single-element collections
140    ///
141    /// # Examples
142    /// ```
143    /// use stats::MinMax;
144    ///
145    /// let mut asc: MinMax<i32> = vec![1, 2, 3, 4, 5].into_iter().collect();
146    /// assert_eq!(asc.sortiness(), 1.0);
147    ///
148    /// let mut desc: MinMax<i32> = vec![5, 4, 3, 2, 1].into_iter().collect();
149    /// assert_eq!(desc.sortiness(), -1.0);
150    ///
151    /// let mut mostly_asc: MinMax<i32> = vec![1, 2, 4, 3, 5].into_iter().collect();
152    /// assert!(mostly_asc.sortiness() > 0.0); // Positive but less than 1.0
153    /// ```
154    #[inline]
155    #[must_use]
156    pub fn sortiness(&self) -> f64 {
157        if let 0 | 1 = self.len {
158            0.0
159        } else {
160            let total_pairs = self.ascending_pairs + self.descending_pairs;
161            if total_pairs == 0 {
162                0.0
163            } else {
164                (self.ascending_pairs as f64 - self.descending_pairs as f64) / total_pairs as f64
165            }
166        }
167    }
168}
169
170impl<T: PartialOrd + Clone> Commute for MinMax<T> {
171    #[inline]
172    fn merge(&mut self, v: MinMax<T>) {
173        self.len += v.len;
174        if self.min.is_none() || (v.min.is_some() && v.min < self.min) {
175            self.min = v.min;
176        }
177        if self.max.is_none() || (v.max.is_some() && v.max > self.max) {
178            self.max = v.max;
179        }
180
181        // Merge pair counts
182        self.ascending_pairs += v.ascending_pairs;
183        self.descending_pairs += v.descending_pairs;
184
185        // Handle merging of first_value and last_value
186        if self.len > 1 && v.len > 0 {
187            if self.first_value.is_none() {
188                self.first_value.clone_from(&v.first_value);
189            }
190            if let (Some(last), Some(v_first)) = (&self.last_value, &v.first_value) {
191                match v_first.partial_cmp(last) {
192                    Some(Ordering::Greater | Ordering::Equal) => self.ascending_pairs += 1,
193                    Some(Ordering::Less) => self.descending_pairs += 1,
194                    None => {}
195                }
196            }
197            self.last_value = v.last_value;
198        }
199    }
200}
201
202impl<T: PartialOrd> Default for MinMax<T> {
203    #[inline]
204    fn default() -> MinMax<T> {
205        MinMax {
206            len: 0,
207            min: None,
208            max: None,
209            first_value: None,
210            last_value: None,
211            ascending_pairs: 0,
212            descending_pairs: 0,
213        }
214    }
215}
216
217#[cfg(debug_assertions)]
218impl<T: fmt::Debug> fmt::Debug for MinMax<T> {
219    #[inline]
220    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
221        match (&self.min, &self.max) {
222            (Some(min), Some(max)) => {
223                let sort_status = if let 0 | 1 = self.len {
224                    SortOrder::Unsorted
225                } else {
226                    let total_pairs = self.ascending_pairs + self.descending_pairs;
227                    if total_pairs == 0 {
228                        SortOrder::Unsorted
229                    } else {
230                        let sortiness = (self.ascending_pairs as f64
231                            - self.descending_pairs as f64)
232                            / total_pairs as f64;
233                        match sortiness {
234                            1.0 => SortOrder::Ascending,
235                            -1.0 => SortOrder::Descending,
236                            _ => SortOrder::Unsorted,
237                        }
238                    }
239                };
240                write!(f, "[{min:?}, {max:?}], sort_order: {sort_status:?}")
241            }
242            (&None, &None) => write!(f, "N/A"),
243            _ => unreachable!(),
244        }
245    }
246}
247
248impl fmt::Display for SortOrder {
249    #[inline]
250    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
251        match self {
252            SortOrder::Unsorted => write!(f, "Unsorted"),
253            SortOrder::Ascending => write!(f, "Ascending"),
254            SortOrder::Descending => write!(f, "Descending"),
255        }
256    }
257}
258
259impl<T: PartialOrd + Clone> FromIterator<T> for MinMax<T> {
260    #[inline]
261    fn from_iter<I: IntoIterator<Item = T>>(it: I) -> MinMax<T> {
262        let mut v = MinMax::new();
263        v.extend(it);
264        v
265    }
266}
267
268impl<T: PartialOrd + Clone> Extend<T> for MinMax<T> {
269    #[inline]
270    fn extend<I: IntoIterator<Item = T>>(&mut self, it: I) {
271        for sample in it {
272            self.add(sample);
273        }
274    }
275}
276
277#[cfg(test)]
278mod test {
279    use super::{MinMax, SortOrder};
280    use crate::Commute;
281
282    #[test]
283    fn minmax() {
284        let minmax: MinMax<u32> = vec![1u32, 4, 2, 3, 10].into_iter().collect();
285        assert_eq!(minmax.min(), Some(&1u32));
286        assert_eq!(minmax.max(), Some(&10u32));
287        assert_eq!(minmax.sort_order(), SortOrder::Unsorted);
288    }
289
290    #[test]
291    fn minmax_sorted_ascending() {
292        let minmax: MinMax<u32> = vec![1u32, 2, 3, 4, 5].into_iter().collect();
293        assert_eq!(minmax.min(), Some(&1u32));
294        assert_eq!(minmax.max(), Some(&5u32));
295        assert_eq!(minmax.sort_order(), SortOrder::Ascending);
296    }
297
298    #[test]
299    fn minmax_sorted_descending() {
300        let minmax: MinMax<u32> = vec![5u32, 4, 3, 2, 1].into_iter().collect();
301        assert_eq!(minmax.min(), Some(&1u32));
302        assert_eq!(minmax.max(), Some(&5u32));
303        assert_eq!(minmax.sort_order(), SortOrder::Descending);
304    }
305
306    #[test]
307    fn minmax_empty() {
308        let minmax: MinMax<u32> = MinMax::new();
309        assert!(minmax.is_empty());
310        assert_eq!(minmax.sort_order(), SortOrder::Unsorted);
311    }
312
313    #[test]
314    fn minmax_merge_empty() {
315        let mut mx1: MinMax<u32> = vec![1, 4, 2, 3, 10].into_iter().collect();
316        assert_eq!(mx1.min(), Some(&1u32));
317        assert_eq!(mx1.max(), Some(&10u32));
318        assert_eq!(mx1.sort_order(), SortOrder::Unsorted);
319
320        mx1.merge(MinMax::default());
321        assert_eq!(mx1.min(), Some(&1u32));
322        assert_eq!(mx1.max(), Some(&10u32));
323        assert_eq!(mx1.sort_order(), SortOrder::Unsorted);
324    }
325
326    #[test]
327    fn minmax_merge_diffsorts() {
328        let mut mx1: MinMax<u32> = vec![1, 2, 2, 2, 3, 3, 4, 10].into_iter().collect();
329        assert_eq!(mx1.min(), Some(&1u32));
330        assert_eq!(mx1.max(), Some(&10u32));
331        assert_eq!(mx1.sort_order(), SortOrder::Ascending);
332
333        let mx2: MinMax<u32> = vec![5, 4, 3, 2, 1].into_iter().collect();
334        assert_eq!(mx2.min(), Some(&1u32));
335        assert_eq!(mx2.max(), Some(&5u32));
336        assert_eq!(mx2.sort_order(), SortOrder::Descending);
337        mx1.merge(mx2);
338        assert_eq!(mx1.min(), Some(&1u32));
339        assert_eq!(mx1.max(), Some(&10u32));
340        assert_eq!(mx1.sort_order(), SortOrder::Unsorted);
341    }
342
343    #[test]
344    fn minmax_merge_asc_sorts() {
345        let mut mx1: MinMax<u32> = vec![2, 2, 2, 5, 10].into_iter().collect();
346        assert_eq!(mx1.min(), Some(&2u32));
347        assert_eq!(mx1.max(), Some(&10u32));
348        assert_eq!(mx1.sort_order(), SortOrder::Ascending);
349
350        let mx2: MinMax<u32> = vec![11, 14, 23, 32, 41].into_iter().collect();
351        assert_eq!(mx2.min(), Some(&11u32));
352        assert_eq!(mx2.max(), Some(&41u32));
353        assert_eq!(mx2.sort_order(), SortOrder::Ascending);
354        mx1.merge(mx2);
355        assert_eq!(mx1.min(), Some(&2u32));
356        assert_eq!(mx1.max(), Some(&41u32));
357        assert_eq!(mx1.sort_order(), SortOrder::Ascending);
358    }
359
360    #[test]
361    fn test_sortiness() {
362        // Test empty
363        let minmax: MinMax<u32> = MinMax::new();
364        assert_eq!(minmax.sortiness(), 0.0);
365
366        // Test single element
367        let minmax: MinMax<u32> = vec![1].into_iter().collect();
368        assert_eq!(minmax.sortiness(), 0.0);
369
370        // Test perfectly ascending
371        let minmax: MinMax<u32> = vec![1, 2, 3, 4, 5].into_iter().collect();
372        assert_eq!(minmax.sortiness(), 1.0);
373
374        // Test perfectly descending
375        let minmax: MinMax<u32> = vec![5, 4, 3, 2, 1].into_iter().collect();
376        assert_eq!(minmax.sortiness(), -1.0);
377
378        // Test all equal
379        let minmax: MinMax<u32> = vec![1, 1, 1, 1].into_iter().collect();
380        assert_eq!(minmax.sortiness(), 1.0); // Equal pairs are considered ascending
381
382        // Test mostly ascending
383        let minmax: MinMax<u32> = vec![1, 2, 4, 3, 5].into_iter().collect();
384        assert!(minmax.sortiness() > 0.0 && minmax.sortiness() < 1.0);
385        assert_eq!(minmax.sortiness(), 0.5); // 2 ascending pairs, 1 descending pair
386
387        // Test mostly descending
388        let minmax: MinMax<u32> = vec![5, 4, 3, 4, 2].into_iter().collect();
389        assert!(minmax.sortiness() < 0.0 && minmax.sortiness() > -1.0);
390        assert_eq!(minmax.sortiness(), -0.5); // 1 ascending pair, 3 descending pairs
391    }
392
393    #[test]
394    fn test_sortiness_merge() {
395        let mut mx1: MinMax<u32> = vec![1, 2, 3].into_iter().collect();
396        let mx2: MinMax<u32> = vec![4, 5, 6].into_iter().collect();
397        assert_eq!(mx1.sortiness(), 1.0);
398        assert_eq!(mx2.sortiness(), 1.0);
399
400        mx1.merge(mx2);
401        assert_eq!(mx1.sortiness(), 1.0); // Should remain perfectly sorted after merge
402
403        let mut mx3: MinMax<u32> = vec![1, 2, 3].into_iter().collect();
404        let mx4: MinMax<u32> = vec![2, 1, 0].into_iter().collect();
405        mx3.merge(mx4);
406        assert_eq!(mx3, vec![1, 2, 3, 2, 1, 0].into_iter().collect());
407        assert!(mx3.sortiness() < 1.0); // Should show mixed sorting after merge
408        assert_eq!(mx3.sortiness(), -0.2);
409    }
410}
411
412#[test]
413fn test_sortiness_simple_alphabetical() {
414    let minmax: MinMax<String> = vec![
415        "a".to_string(),
416        "b".to_string(),
417        "c".to_string(),
418        "d".to_string(),
419    ]
420    .into_iter()
421    .collect();
422    assert_eq!(minmax.sortiness(), 1.0);
423    assert_eq!(minmax.sort_order(), SortOrder::Ascending);
424
425    let minmax: MinMax<String> = vec![
426        "d".to_string(),
427        "c".to_string(),
428        "b".to_string(),
429        "a".to_string(),
430    ]
431    .into_iter()
432    .collect();
433    assert_eq!(minmax.sortiness(), -1.0);
434    assert_eq!(minmax.sort_order(), SortOrder::Descending);
435
436    let minmax: MinMax<String> = vec![
437        "a".to_string(),
438        "b".to_string(),
439        "c".to_string(),
440        "a".to_string(),
441    ]
442    .into_iter()
443    .collect();
444    assert_eq!(minmax.sortiness(), 0.3333333333333333);
445    assert_eq!(minmax.sort_order(), SortOrder::Unsorted);
446}