Skip to main content

stats/
minmax.rs

1#![allow(clippy::cast_lossless)]
2use serde::{Deserialize, Serialize};
3use std::cmp::Ordering;
4use std::fmt;
5
6use crate::Commute;
7
8/// Represents the current sort order of the data.
9#[derive(Clone, Copy, Debug, PartialEq, Eq, Deserialize, Serialize)]
10pub enum SortOrder {
11    Unsorted,
12    Ascending,
13    Descending,
14}
15
16/// A commutative data structure for tracking minimum and maximum values
17/// and detecting sort order in a stream of data.
18#[derive(Clone, Copy, Deserialize, Serialize, Eq, PartialEq)]
19pub struct MinMax<T> {
20    len: u32,
21    min: Option<T>,
22    max: Option<T>,
23    first_value: Option<T>,
24    last_value: Option<T>,
25    ascending_pairs: u32,
26    descending_pairs: u32,
27}
28
29impl<T: PartialOrd + Clone> MinMax<T> {
30    /// Create an empty state where min and max values do not exist.
31    #[must_use]
32    pub fn new() -> MinMax<T> {
33        Default::default()
34    }
35
36    /// Add a sample to the data and track min/max, the sort order & "sortiness".
37    #[inline]
38    pub fn add(&mut self, sample: T) {
39        match self.len {
40            // this comes first because it's the most common case
41            2.. => {
42                if let Some(ref last) = self.last_value {
43                    #[allow(clippy::match_same_arms)]
44                    match sample.partial_cmp(last) {
45                        Some(Ordering::Greater) => self.ascending_pairs += 1,
46                        Some(Ordering::Less) => self.descending_pairs += 1,
47                        // this comes last because it's the least common case
48                        Some(Ordering::Equal) => self.ascending_pairs += 1,
49                        None => {}
50                    }
51                }
52            }
53            0 => {
54                // first sample - clone for first_value and min, move to max
55                self.first_value = Some(sample.clone());
56                self.min = Some(sample.clone());
57                self.max = Some(sample);
58                self.len = 1;
59                return;
60            }
61            1 => {
62                // second sample
63                if let Some(ref first) = self.first_value {
64                    match sample.partial_cmp(first) {
65                        Some(Ordering::Greater | Ordering::Equal) => self.ascending_pairs = 1,
66                        Some(Ordering::Less) => self.descending_pairs = 1,
67                        None => {}
68                    }
69                }
70            }
71        }
72
73        // Update min/max
74        if self.min.as_ref().is_none_or(|v| &sample < v) {
75            self.min = Some(sample.clone());
76        } else if self.max.as_ref().is_none_or(|v| &sample > v) {
77            self.max = Some(sample.clone());
78        }
79
80        // Update last value and number of samples
81        self.last_value = Some(sample);
82        self.len += 1;
83    }
84
85    /// Add a sample by reference, only cloning when necessary to update
86    /// min, max, first_value, or last_value.
87    ///
88    /// This is more efficient than `add()` when the caller has a reference
89    /// and most samples don't update min/max, because it avoids the upfront
90    /// allocation that `add()` requires from the caller.
91    ///
92    /// For `last_value`, the existing allocation is reused when possible
93    /// by clearing and cloning into it rather than replacing.
94    #[inline]
95    pub fn add_ref(&mut self, sample: &T) {
96        match self.len {
97            2.. => {
98                if let Some(ref last) = self.last_value {
99                    #[allow(clippy::match_same_arms)]
100                    match sample.partial_cmp(last) {
101                        Some(Ordering::Greater) => self.ascending_pairs += 1,
102                        Some(Ordering::Less) => self.descending_pairs += 1,
103                        Some(Ordering::Equal) => self.ascending_pairs += 1,
104                        None => {}
105                    }
106                }
107            }
108            0 => {
109                self.first_value = Some(sample.clone());
110                self.min = Some(sample.clone());
111                self.max = Some(sample.clone());
112                self.len = 1;
113                return;
114            }
115            1 => {
116                if let Some(ref first) = self.first_value {
117                    match sample.partial_cmp(first) {
118                        Some(Ordering::Greater | Ordering::Equal) => self.ascending_pairs = 1,
119                        Some(Ordering::Less) => self.descending_pairs = 1,
120                        None => {}
121                    }
122                }
123            }
124        }
125
126        // Update min/max - only clone when actually updating
127        if self.min.as_ref().is_none_or(|v| sample < v) {
128            self.min = Some(sample.clone());
129        } else if self.max.as_ref().is_none_or(|v| sample > v) {
130            self.max = Some(sample.clone());
131        }
132
133        // Update last value - clone_from reuses existing allocation
134        if let Some(ref mut last) = self.last_value {
135            last.clone_from(sample);
136        } else {
137            self.last_value = Some(sample.clone());
138        }
139        self.len += 1;
140    }
141
142    /// Returns the minimum of the data set.
143    ///
144    /// `None` is returned if and only if the number of samples is `0`.
145    #[inline]
146    #[must_use]
147    pub const fn min(&self) -> Option<&T> {
148        self.min.as_ref()
149    }
150
151    /// Returns the maximum of the data set.
152    ///
153    /// `None` is returned if and only if the number of samples is `0`.
154    #[inline]
155    #[must_use]
156    pub const fn max(&self) -> Option<&T> {
157        self.max.as_ref()
158    }
159
160    /// Returns the number of data points.
161    #[inline]
162    #[must_use]
163    pub const fn len(&self) -> usize {
164        self.len as usize
165    }
166
167    /// Returns true if there are no data points.
168    #[inline]
169    #[must_use]
170    pub const fn is_empty(&self) -> bool {
171        self.len == 0
172    }
173
174    /// Returns the current sort order of the data.
175    #[inline]
176    #[must_use]
177    pub fn sort_order(&self) -> SortOrder {
178        let sortiness = self.sortiness();
179        // Use 1e-9 to handle floating point imprecision
180        // don't use f64::EPSILON because it's too small
181        if (sortiness - 1.0).abs() <= 1e-9 {
182            SortOrder::Ascending
183        } else if (sortiness + 1.0).abs() <= 1e-9 {
184            SortOrder::Descending
185        } else {
186            SortOrder::Unsorted
187        }
188    }
189
190    /// Calculates a "sortiness" score for the data, indicating how close it is to being sorted.
191    ///
192    /// Returns a value between -1.0 and 1.0:
193    /// * 1.0 indicates perfectly ascending order
194    /// * -1.0 indicates perfectly descending order
195    /// * Values in between indicate the general tendency towards ascending or descending order
196    /// * 0.0 indicates either no clear ordering or empty/single-element collections
197    ///
198    /// # Examples
199    /// ```
200    /// use stats::MinMax;
201    ///
202    /// let mut asc: MinMax<i32> = vec![1, 2, 3, 4, 5].into_iter().collect();
203    /// assert_eq!(asc.sortiness(), 1.0);
204    ///
205    /// let mut desc: MinMax<i32> = vec![5, 4, 3, 2, 1].into_iter().collect();
206    /// assert_eq!(desc.sortiness(), -1.0);
207    ///
208    /// let mut mostly_asc: MinMax<i32> = vec![1, 2, 4, 3, 5].into_iter().collect();
209    /// assert!(mostly_asc.sortiness() > 0.0); // Positive but less than 1.0
210    /// ```
211    #[inline]
212    #[must_use]
213    pub fn sortiness(&self) -> f64 {
214        if let 0 | 1 = self.len {
215            0.0
216        } else {
217            let total_pairs = self.ascending_pairs + self.descending_pairs;
218            if total_pairs == 0 {
219                0.0
220            } else {
221                (self.ascending_pairs as f64 - self.descending_pairs as f64) / total_pairs as f64
222            }
223        }
224    }
225}
226
227impl MinMax<Vec<u8>> {
228    /// Add a byte slice sample, avoiding heap allocation when the value doesn't
229    /// update min, max, or first_value. Only `last_value` is always updated
230    /// (reusing its existing allocation via `clone_from`).
231    ///
232    /// This is significantly more efficient than `add(sample.to_vec())` for large
233    /// datasets where most values don't update min/max — avoiding ~99% of
234    /// allocations in the common case.
235    #[inline]
236    pub fn add_bytes(&mut self, sample: &[u8]) {
237        match self.len {
238            2.. => {
239                if let Some(ref last) = self.last_value {
240                    #[allow(clippy::match_same_arms)]
241                    match sample.partial_cmp(last.as_slice()) {
242                        Some(Ordering::Greater) => self.ascending_pairs += 1,
243                        Some(Ordering::Less) => self.descending_pairs += 1,
244                        Some(Ordering::Equal) => self.ascending_pairs += 1,
245                        None => {}
246                    }
247                }
248            }
249            0 => {
250                let owned = sample.to_vec();
251                self.first_value = Some(owned.clone());
252                self.min = Some(owned.clone());
253                self.max = Some(owned);
254                self.len = 1;
255                return;
256            }
257            1 => {
258                if let Some(ref first) = self.first_value {
259                    match sample.partial_cmp(first.as_slice()) {
260                        Some(Ordering::Greater | Ordering::Equal) => self.ascending_pairs = 1,
261                        Some(Ordering::Less) => self.descending_pairs = 1,
262                        None => {}
263                    }
264                }
265            }
266        }
267
268        // Update min/max - only allocate when actually updating
269        if self.min.as_ref().is_none_or(|v| sample < v.as_slice()) {
270            self.min = Some(sample.to_vec());
271        } else if self.max.as_ref().is_none_or(|v| sample > v.as_slice()) {
272            self.max = Some(sample.to_vec());
273        }
274
275        // Update last value - reuse existing allocation
276        if let Some(ref mut last) = self.last_value {
277            last.clear();
278            last.extend_from_slice(sample);
279        } else {
280            self.last_value = Some(sample.to_vec());
281        }
282        self.len += 1;
283    }
284}
285
286impl<T: PartialOrd + Clone> Commute for MinMax<T> {
287    #[inline]
288    fn merge(&mut self, v: MinMax<T>) {
289        self.len += v.len;
290        if self.min.is_none() || (v.min.is_some() && v.min < self.min) {
291            self.min = v.min;
292        }
293        if self.max.is_none() || (v.max.is_some() && v.max > self.max) {
294            self.max = v.max;
295        }
296
297        // Merge pair counts
298        self.ascending_pairs += v.ascending_pairs;
299        self.descending_pairs += v.descending_pairs;
300
301        // Handle merging of first_value and last_value
302        if self.len > 1 && v.len > 0 {
303            if self.first_value.is_none() {
304                self.first_value.clone_from(&v.first_value);
305            }
306            if let (Some(last), Some(v_first)) = (&self.last_value, &v.first_value) {
307                match v_first.partial_cmp(last) {
308                    Some(Ordering::Greater | Ordering::Equal) => self.ascending_pairs += 1,
309                    Some(Ordering::Less) => self.descending_pairs += 1,
310                    None => {}
311                }
312            }
313            self.last_value = v.last_value;
314        }
315    }
316}
317
318impl<T: PartialOrd> Default for MinMax<T> {
319    #[inline]
320    fn default() -> MinMax<T> {
321        MinMax {
322            len: 0,
323            min: None,
324            max: None,
325            first_value: None,
326            last_value: None,
327            ascending_pairs: 0,
328            descending_pairs: 0,
329        }
330    }
331}
332
333#[cfg(debug_assertions)]
334impl<T: fmt::Debug> fmt::Debug for MinMax<T> {
335    #[inline]
336    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
337        match (&self.min, &self.max) {
338            (Some(min), Some(max)) => {
339                let sort_status = if let 0 | 1 = self.len {
340                    SortOrder::Unsorted
341                } else {
342                    let total_pairs = self.ascending_pairs + self.descending_pairs;
343                    if total_pairs == 0 {
344                        SortOrder::Unsorted
345                    } else {
346                        let sortiness = (self.ascending_pairs as f64
347                            - self.descending_pairs as f64)
348                            / total_pairs as f64;
349                        match sortiness {
350                            1.0 => SortOrder::Ascending,
351                            -1.0 => SortOrder::Descending,
352                            _ => SortOrder::Unsorted,
353                        }
354                    }
355                };
356                write!(f, "[{min:?}, {max:?}], sort_order: {sort_status:?}")
357            }
358            (&None, &None) => write!(f, "N/A"),
359            _ => unreachable!(),
360        }
361    }
362}
363
364impl fmt::Display for SortOrder {
365    #[inline]
366    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
367        match self {
368            SortOrder::Unsorted => write!(f, "Unsorted"),
369            SortOrder::Ascending => write!(f, "Ascending"),
370            SortOrder::Descending => write!(f, "Descending"),
371        }
372    }
373}
374
375impl<T: PartialOrd + Clone> FromIterator<T> for MinMax<T> {
376    #[inline]
377    fn from_iter<I: IntoIterator<Item = T>>(it: I) -> MinMax<T> {
378        let mut v = MinMax::new();
379        v.extend(it);
380        v
381    }
382}
383
384impl<T: PartialOrd + Clone> Extend<T> for MinMax<T> {
385    #[inline]
386    fn extend<I: IntoIterator<Item = T>>(&mut self, it: I) {
387        for sample in it {
388            self.add(sample);
389        }
390    }
391}
392
393#[cfg(test)]
394mod test {
395    use super::{MinMax, SortOrder};
396    use crate::Commute;
397
398    #[test]
399    fn minmax() {
400        let minmax: MinMax<u32> = vec![1u32, 4, 2, 3, 10].into_iter().collect();
401        assert_eq!(minmax.min(), Some(&1u32));
402        assert_eq!(minmax.max(), Some(&10u32));
403        assert_eq!(minmax.sort_order(), SortOrder::Unsorted);
404    }
405
406    #[test]
407    fn minmax_sorted_ascending() {
408        let minmax: MinMax<u32> = vec![1u32, 2, 3, 4, 5].into_iter().collect();
409        assert_eq!(minmax.min(), Some(&1u32));
410        assert_eq!(minmax.max(), Some(&5u32));
411        assert_eq!(minmax.sort_order(), SortOrder::Ascending);
412    }
413
414    #[test]
415    fn minmax_sorted_descending() {
416        let minmax: MinMax<u32> = vec![5u32, 4, 3, 2, 1].into_iter().collect();
417        assert_eq!(minmax.min(), Some(&1u32));
418        assert_eq!(minmax.max(), Some(&5u32));
419        assert_eq!(minmax.sort_order(), SortOrder::Descending);
420    }
421
422    #[test]
423    fn minmax_empty() {
424        let minmax: MinMax<u32> = MinMax::new();
425        assert!(minmax.is_empty());
426        assert_eq!(minmax.sort_order(), SortOrder::Unsorted);
427    }
428
429    #[test]
430    fn minmax_merge_empty() {
431        let mut mx1: MinMax<u32> = vec![1, 4, 2, 3, 10].into_iter().collect();
432        assert_eq!(mx1.min(), Some(&1u32));
433        assert_eq!(mx1.max(), Some(&10u32));
434        assert_eq!(mx1.sort_order(), SortOrder::Unsorted);
435
436        mx1.merge(MinMax::default());
437        assert_eq!(mx1.min(), Some(&1u32));
438        assert_eq!(mx1.max(), Some(&10u32));
439        assert_eq!(mx1.sort_order(), SortOrder::Unsorted);
440    }
441
442    #[test]
443    fn minmax_merge_diffsorts() {
444        let mut mx1: MinMax<u32> = vec![1, 2, 2, 2, 3, 3, 4, 10].into_iter().collect();
445        assert_eq!(mx1.min(), Some(&1u32));
446        assert_eq!(mx1.max(), Some(&10u32));
447        assert_eq!(mx1.sort_order(), SortOrder::Ascending);
448
449        let mx2: MinMax<u32> = vec![5, 4, 3, 2, 1].into_iter().collect();
450        assert_eq!(mx2.min(), Some(&1u32));
451        assert_eq!(mx2.max(), Some(&5u32));
452        assert_eq!(mx2.sort_order(), SortOrder::Descending);
453        mx1.merge(mx2);
454        assert_eq!(mx1.min(), Some(&1u32));
455        assert_eq!(mx1.max(), Some(&10u32));
456        assert_eq!(mx1.sort_order(), SortOrder::Unsorted);
457    }
458
459    #[test]
460    fn minmax_merge_asc_sorts() {
461        let mut mx1: MinMax<u32> = vec![2, 2, 2, 5, 10].into_iter().collect();
462        assert_eq!(mx1.min(), Some(&2u32));
463        assert_eq!(mx1.max(), Some(&10u32));
464        assert_eq!(mx1.sort_order(), SortOrder::Ascending);
465
466        let mx2: MinMax<u32> = vec![11, 14, 23, 32, 41].into_iter().collect();
467        assert_eq!(mx2.min(), Some(&11u32));
468        assert_eq!(mx2.max(), Some(&41u32));
469        assert_eq!(mx2.sort_order(), SortOrder::Ascending);
470        mx1.merge(mx2);
471        assert_eq!(mx1.min(), Some(&2u32));
472        assert_eq!(mx1.max(), Some(&41u32));
473        assert_eq!(mx1.sort_order(), SortOrder::Ascending);
474    }
475
476    #[test]
477    fn test_sortiness() {
478        // Test empty
479        let minmax: MinMax<u32> = MinMax::new();
480        assert_eq!(minmax.sortiness(), 0.0);
481
482        // Test single element
483        let minmax: MinMax<u32> = vec![1].into_iter().collect();
484        assert_eq!(minmax.sortiness(), 0.0);
485
486        // Test perfectly ascending
487        let minmax: MinMax<u32> = vec![1, 2, 3, 4, 5].into_iter().collect();
488        assert_eq!(minmax.sortiness(), 1.0);
489
490        // Test perfectly descending
491        let minmax: MinMax<u32> = vec![5, 4, 3, 2, 1].into_iter().collect();
492        assert_eq!(minmax.sortiness(), -1.0);
493
494        // Test all equal
495        let minmax: MinMax<u32> = vec![1, 1, 1, 1].into_iter().collect();
496        assert_eq!(minmax.sortiness(), 1.0); // Equal pairs are considered ascending
497
498        // Test mostly ascending
499        let minmax: MinMax<u32> = vec![1, 2, 4, 3, 5].into_iter().collect();
500        assert!(minmax.sortiness() > 0.0 && minmax.sortiness() < 1.0);
501        assert_eq!(minmax.sortiness(), 0.5); // 2 ascending pairs, 1 descending pair
502
503        // Test mostly descending
504        let minmax: MinMax<u32> = vec![5, 4, 3, 4, 2].into_iter().collect();
505        assert!(minmax.sortiness() < 0.0 && minmax.sortiness() > -1.0);
506        assert_eq!(minmax.sortiness(), -0.5); // 1 ascending pair, 3 descending pairs
507    }
508
509    #[test]
510    fn test_sortiness_merge() {
511        let mut mx1: MinMax<u32> = vec![1, 2, 3].into_iter().collect();
512        let mx2: MinMax<u32> = vec![4, 5, 6].into_iter().collect();
513        assert_eq!(mx1.sortiness(), 1.0);
514        assert_eq!(mx2.sortiness(), 1.0);
515
516        mx1.merge(mx2);
517        assert_eq!(mx1.sortiness(), 1.0); // Should remain perfectly sorted after merge
518
519        let mut mx3: MinMax<u32> = vec![1, 2, 3].into_iter().collect();
520        let mx4: MinMax<u32> = vec![2, 1, 0].into_iter().collect();
521        mx3.merge(mx4);
522        assert_eq!(mx3, vec![1, 2, 3, 2, 1, 0].into_iter().collect());
523        assert!(mx3.sortiness() < 1.0); // Should show mixed sorting after merge
524        assert_eq!(mx3.sortiness(), -0.2);
525    }
526}
527
528#[test]
529fn test_sortiness_simple_alphabetical() {
530    let minmax: MinMax<String> = vec![
531        "a".to_string(),
532        "b".to_string(),
533        "c".to_string(),
534        "d".to_string(),
535    ]
536    .into_iter()
537    .collect();
538    assert_eq!(minmax.sortiness(), 1.0);
539    assert_eq!(minmax.sort_order(), SortOrder::Ascending);
540
541    let minmax: MinMax<String> = vec![
542        "d".to_string(),
543        "c".to_string(),
544        "b".to_string(),
545        "a".to_string(),
546    ]
547    .into_iter()
548    .collect();
549    assert_eq!(minmax.sortiness(), -1.0);
550    assert_eq!(minmax.sort_order(), SortOrder::Descending);
551
552    let minmax: MinMax<String> = vec![
553        "a".to_string(),
554        "b".to_string(),
555        "c".to_string(),
556        "a".to_string(),
557    ]
558    .into_iter()
559    .collect();
560    assert_eq!(minmax.sortiness(), 0.3333333333333333);
561    assert_eq!(minmax.sort_order(), SortOrder::Unsorted);
562}