Skip to main content

stats/
minmax.rs

1#![allow(clippy::cast_lossless)]
2use serde::{Deserialize, Serialize};
3use std::cmp::Ordering;
4use std::fmt;
5
6use crate::Commute;
7
8/// Represents the current sort order of the data.
9#[derive(Clone, Copy, Debug, PartialEq, Eq, Deserialize, Serialize)]
10pub enum SortOrder {
11    Unsorted,
12    Ascending,
13    Descending,
14}
15
16/// A commutative data structure for tracking minimum and maximum values
17/// and detecting sort order in a stream of data.
18#[derive(Clone, Copy, Deserialize, Serialize, Eq, PartialEq)]
19pub struct MinMax<T> {
20    // Hot fields: accessed on every add() call, grouped on same cache line
21    len: u32,
22    ascending_pairs: u32,
23    descending_pairs: u32,
24    // Warm fields: accessed conditionally
25    min: Option<T>,
26    max: Option<T>,
27    first_value: Option<T>,
28    last_value: Option<T>,
29}
30
31impl<T: PartialOrd + Clone> MinMax<T> {
32    /// Create an empty state where min and max values do not exist.
33    #[must_use]
34    pub fn new() -> MinMax<T> {
35        Default::default()
36    }
37
38    /// Add a sample to the data and track min/max, the sort order & "sortiness".
39    #[inline]
40    pub fn add(&mut self, sample: T) {
41        match self.len {
42            // this comes first because it's the most common case
43            2.. => {
44                if let Some(ref last) = self.last_value {
45                    #[allow(clippy::match_same_arms)]
46                    match sample.partial_cmp(last) {
47                        Some(Ordering::Greater) => self.ascending_pairs += 1,
48                        Some(Ordering::Less) => self.descending_pairs += 1,
49                        // this comes last because it's the least common case
50                        Some(Ordering::Equal) => self.ascending_pairs += 1,
51                        None => {}
52                    }
53                }
54            }
55            0 => {
56                // first sample - clone for first_value and min, move to max
57                self.first_value = Some(sample.clone());
58                self.min = Some(sample.clone());
59                self.max = Some(sample);
60                self.len = 1;
61                return;
62            }
63            1 => {
64                // second sample
65                if let Some(ref first) = self.first_value {
66                    match sample.partial_cmp(first) {
67                        Some(Ordering::Greater | Ordering::Equal) => self.ascending_pairs = 1,
68                        Some(Ordering::Less) => self.descending_pairs = 1,
69                        None => {}
70                    }
71                }
72            }
73        }
74
75        // Update min/max
76        if self.min.as_ref().is_none_or(|v| &sample < v) {
77            self.min = Some(sample.clone());
78        } else if self.max.as_ref().is_none_or(|v| &sample > v) {
79            self.max = Some(sample.clone());
80        }
81
82        // Update last value and number of samples
83        self.last_value = Some(sample);
84        self.len += 1;
85    }
86
87    /// Add a sample by reference, only cloning when necessary to update
88    /// min, max, `first_value`, or `last_value`.
89    ///
90    /// This is more efficient than `add()` when the caller has a reference
91    /// and most samples don't update min/max, because it avoids the upfront
92    /// allocation that `add()` requires from the caller.
93    ///
94    /// For `last_value`, the existing allocation is reused when possible
95    /// by clearing and cloning into it rather than replacing.
96    #[inline]
97    pub fn add_ref(&mut self, sample: &T) {
98        match self.len {
99            2.. => {
100                if let Some(ref last) = self.last_value {
101                    #[allow(clippy::match_same_arms)]
102                    match sample.partial_cmp(last) {
103                        Some(Ordering::Greater) => self.ascending_pairs += 1,
104                        Some(Ordering::Less) => self.descending_pairs += 1,
105                        Some(Ordering::Equal) => self.ascending_pairs += 1,
106                        None => {}
107                    }
108                }
109            }
110            0 => {
111                self.first_value = Some(sample.clone());
112                self.min = Some(sample.clone());
113                self.max = Some(sample.clone());
114                self.len = 1;
115                return;
116            }
117            1 => {
118                if let Some(ref first) = self.first_value {
119                    match sample.partial_cmp(first) {
120                        Some(Ordering::Greater | Ordering::Equal) => self.ascending_pairs = 1,
121                        Some(Ordering::Less) => self.descending_pairs = 1,
122                        None => {}
123                    }
124                }
125            }
126        }
127
128        // Update min/max - only clone when actually updating
129        if self.min.as_ref().is_none_or(|v| sample < v) {
130            self.min = Some(sample.clone());
131        } else if self.max.as_ref().is_none_or(|v| sample > v) {
132            self.max = Some(sample.clone());
133        }
134
135        // Update last value - clone_from reuses existing allocation
136        if let Some(ref mut last) = self.last_value {
137            last.clone_from(sample);
138        } else {
139            self.last_value = Some(sample.clone());
140        }
141        self.len += 1;
142    }
143
144    /// Returns the minimum of the data set.
145    ///
146    /// `None` is returned if and only if the number of samples is `0`.
147    #[inline]
148    #[must_use]
149    pub const fn min(&self) -> Option<&T> {
150        self.min.as_ref()
151    }
152
153    /// Returns the maximum of the data set.
154    ///
155    /// `None` is returned if and only if the number of samples is `0`.
156    #[inline]
157    #[must_use]
158    pub const fn max(&self) -> Option<&T> {
159        self.max.as_ref()
160    }
161
162    /// Returns the number of data points.
163    #[inline]
164    #[must_use]
165    pub const fn len(&self) -> usize {
166        self.len as usize
167    }
168
169    /// Returns true if there are no data points.
170    #[inline]
171    #[must_use]
172    pub const fn is_empty(&self) -> bool {
173        self.len == 0
174    }
175
176    /// Returns the current sort order of the data.
177    #[inline]
178    #[must_use]
179    pub fn sort_order(&self) -> SortOrder {
180        let sortiness = self.sortiness();
181        // Use 1e-9 to handle floating point imprecision
182        // don't use f64::EPSILON because it's too small
183        if (sortiness - 1.0).abs() <= 1e-9 {
184            SortOrder::Ascending
185        } else if (sortiness + 1.0).abs() <= 1e-9 {
186            SortOrder::Descending
187        } else {
188            SortOrder::Unsorted
189        }
190    }
191
192    /// Calculates a "sortiness" score for the data, indicating how close it is to being sorted.
193    ///
194    /// Returns a value between -1.0 and 1.0:
195    /// * 1.0 indicates perfectly ascending order
196    /// * -1.0 indicates perfectly descending order
197    /// * Values in between indicate the general tendency towards ascending or descending order
198    /// * 0.0 indicates either no clear ordering or empty/single-element collections
199    ///
200    /// # Examples
201    /// ```
202    /// use stats::MinMax;
203    ///
204    /// let mut asc: MinMax<i32> = vec![1, 2, 3, 4, 5].into_iter().collect();
205    /// assert_eq!(asc.sortiness(), 1.0);
206    ///
207    /// let mut desc: MinMax<i32> = vec![5, 4, 3, 2, 1].into_iter().collect();
208    /// assert_eq!(desc.sortiness(), -1.0);
209    ///
210    /// let mut mostly_asc: MinMax<i32> = vec![1, 2, 4, 3, 5].into_iter().collect();
211    /// assert!(mostly_asc.sortiness() > 0.0); // Positive but less than 1.0
212    /// ```
213    #[inline]
214    #[must_use]
215    pub fn sortiness(&self) -> f64 {
216        if let 0 | 1 = self.len {
217            0.0
218        } else {
219            let total_pairs = self.ascending_pairs + self.descending_pairs;
220            if total_pairs == 0 {
221                0.0
222            } else {
223                (self.ascending_pairs as f64 - self.descending_pairs as f64) / total_pairs as f64
224            }
225        }
226    }
227}
228
229impl MinMax<Vec<u8>> {
230    /// Add a byte slice sample, avoiding heap allocation when the value doesn't
231    /// update min, max, or `first_value`. Only `last_value` is always updated
232    /// (reusing its existing allocation via `clone_from`).
233    ///
234    /// This is significantly more efficient than `add(sample.to_vec())` for large
235    /// datasets where most values don't update min/max — avoiding ~99% of
236    /// allocations in the common case.
237    #[inline]
238    pub fn add_bytes(&mut self, sample: &[u8]) {
239        match self.len {
240            2.. => {
241                if let Some(ref last) = self.last_value {
242                    #[allow(clippy::match_same_arms)]
243                    match sample.partial_cmp(last.as_slice()) {
244                        Some(Ordering::Greater) => self.ascending_pairs += 1,
245                        Some(Ordering::Less) => self.descending_pairs += 1,
246                        Some(Ordering::Equal) => self.ascending_pairs += 1,
247                        None => {}
248                    }
249                }
250            }
251            0 => {
252                let owned = sample.to_vec();
253                self.first_value = Some(owned.clone());
254                self.min = Some(owned.clone());
255                self.max = Some(owned);
256                self.len = 1;
257                return;
258            }
259            1 => {
260                if let Some(ref first) = self.first_value {
261                    match sample.partial_cmp(first.as_slice()) {
262                        Some(Ordering::Greater | Ordering::Equal) => self.ascending_pairs = 1,
263                        Some(Ordering::Less) => self.descending_pairs = 1,
264                        None => {}
265                    }
266                }
267            }
268        }
269
270        // Update min/max - only allocate when actually updating
271        if self.min.as_ref().is_none_or(|v| sample < v.as_slice()) {
272            self.min = Some(sample.to_vec());
273        } else if self.max.as_ref().is_none_or(|v| sample > v.as_slice()) {
274            self.max = Some(sample.to_vec());
275        }
276
277        // Update last value - reuse existing allocation
278        if let Some(ref mut last) = self.last_value {
279            last.clear();
280            last.extend_from_slice(sample);
281        } else {
282            self.last_value = Some(sample.to_vec());
283        }
284        self.len += 1;
285    }
286}
287
288impl<T: PartialOrd + Clone> Commute for MinMax<T> {
289    #[inline]
290    fn merge(&mut self, v: MinMax<T>) {
291        if v.min.is_none() {
292            return;
293        }
294        self.len += v.len;
295        if self.min.is_none() || v.min < self.min {
296            self.min = v.min;
297        }
298        if self.max.is_none() || v.max > self.max {
299            self.max = v.max;
300        }
301
302        // Merge pair counts
303        self.ascending_pairs += v.ascending_pairs;
304        self.descending_pairs += v.descending_pairs;
305
306        // Handle merging of first_value and last_value
307        if self.first_value.is_none() {
308            self.first_value.clone_from(&v.first_value);
309        }
310        if v.len > 0 {
311            if let (Some(last), Some(v_first)) = (&self.last_value, &v.first_value) {
312                match v_first.partial_cmp(last) {
313                    Some(Ordering::Greater | Ordering::Equal) => self.ascending_pairs += 1,
314                    Some(Ordering::Less) => self.descending_pairs += 1,
315                    None => {}
316                }
317            }
318            self.last_value = v.last_value;
319        }
320    }
321}
322
323impl<T: PartialOrd> Default for MinMax<T> {
324    #[inline]
325    fn default() -> MinMax<T> {
326        MinMax {
327            len: 0,
328            ascending_pairs: 0,
329            descending_pairs: 0,
330            min: None,
331            max: None,
332            first_value: None,
333            last_value: None,
334        }
335    }
336}
337
338impl<T: fmt::Debug> fmt::Debug for MinMax<T> {
339    #[inline]
340    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
341        match (&self.min, &self.max) {
342            (Some(min), Some(max)) => {
343                let sort_status = if let 0 | 1 = self.len {
344                    SortOrder::Unsorted
345                } else {
346                    let total_pairs = self.ascending_pairs + self.descending_pairs;
347                    if total_pairs == 0 {
348                        SortOrder::Unsorted
349                    } else {
350                        let sortiness = (self.ascending_pairs as f64
351                            - self.descending_pairs as f64)
352                            / total_pairs as f64;
353                        match sortiness {
354                            1.0 => SortOrder::Ascending,
355                            -1.0 => SortOrder::Descending,
356                            _ => SortOrder::Unsorted,
357                        }
358                    }
359                };
360                write!(f, "[{min:?}, {max:?}], sort_order: {sort_status:?}")
361            }
362            (&None, &None) => write!(f, "N/A"),
363            _ => unreachable!(),
364        }
365    }
366}
367
368impl fmt::Display for SortOrder {
369    #[inline]
370    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
371        match self {
372            SortOrder::Unsorted => write!(f, "Unsorted"),
373            SortOrder::Ascending => write!(f, "Ascending"),
374            SortOrder::Descending => write!(f, "Descending"),
375        }
376    }
377}
378
379impl<T: PartialOrd + Clone> FromIterator<T> for MinMax<T> {
380    #[inline]
381    fn from_iter<I: IntoIterator<Item = T>>(it: I) -> MinMax<T> {
382        let mut v = MinMax::new();
383        v.extend(it);
384        v
385    }
386}
387
388impl<T: PartialOrd + Clone> Extend<T> for MinMax<T> {
389    #[inline]
390    fn extend<I: IntoIterator<Item = T>>(&mut self, it: I) {
391        for sample in it {
392            self.add(sample);
393        }
394    }
395}
396
397#[cfg(test)]
398mod test {
399    use super::{MinMax, SortOrder};
400    use crate::Commute;
401
402    #[test]
403    fn minmax() {
404        let minmax: MinMax<u32> = vec![1u32, 4, 2, 3, 10].into_iter().collect();
405        assert_eq!(minmax.min(), Some(&1u32));
406        assert_eq!(minmax.max(), Some(&10u32));
407        assert_eq!(minmax.sort_order(), SortOrder::Unsorted);
408    }
409
410    #[test]
411    fn minmax_sorted_ascending() {
412        let minmax: MinMax<u32> = vec![1u32, 2, 3, 4, 5].into_iter().collect();
413        assert_eq!(minmax.min(), Some(&1u32));
414        assert_eq!(minmax.max(), Some(&5u32));
415        assert_eq!(minmax.sort_order(), SortOrder::Ascending);
416    }
417
418    #[test]
419    fn minmax_sorted_descending() {
420        let minmax: MinMax<u32> = vec![5u32, 4, 3, 2, 1].into_iter().collect();
421        assert_eq!(minmax.min(), Some(&1u32));
422        assert_eq!(minmax.max(), Some(&5u32));
423        assert_eq!(minmax.sort_order(), SortOrder::Descending);
424    }
425
426    #[test]
427    fn minmax_empty() {
428        let minmax: MinMax<u32> = MinMax::new();
429        assert!(minmax.is_empty());
430        assert_eq!(minmax.sort_order(), SortOrder::Unsorted);
431    }
432
433    #[test]
434    fn minmax_merge_empty() {
435        let mut mx1: MinMax<u32> = vec![1, 4, 2, 3, 10].into_iter().collect();
436        assert_eq!(mx1.min(), Some(&1u32));
437        assert_eq!(mx1.max(), Some(&10u32));
438        assert_eq!(mx1.sort_order(), SortOrder::Unsorted);
439
440        mx1.merge(MinMax::default());
441        assert_eq!(mx1.min(), Some(&1u32));
442        assert_eq!(mx1.max(), Some(&10u32));
443        assert_eq!(mx1.sort_order(), SortOrder::Unsorted);
444    }
445
446    #[test]
447    fn minmax_merge_diffsorts() {
448        let mut mx1: MinMax<u32> = vec![1, 2, 2, 2, 3, 3, 4, 10].into_iter().collect();
449        assert_eq!(mx1.min(), Some(&1u32));
450        assert_eq!(mx1.max(), Some(&10u32));
451        assert_eq!(mx1.sort_order(), SortOrder::Ascending);
452
453        let mx2: MinMax<u32> = vec![5, 4, 3, 2, 1].into_iter().collect();
454        assert_eq!(mx2.min(), Some(&1u32));
455        assert_eq!(mx2.max(), Some(&5u32));
456        assert_eq!(mx2.sort_order(), SortOrder::Descending);
457        mx1.merge(mx2);
458        assert_eq!(mx1.min(), Some(&1u32));
459        assert_eq!(mx1.max(), Some(&10u32));
460        assert_eq!(mx1.sort_order(), SortOrder::Unsorted);
461    }
462
463    #[test]
464    fn minmax_merge_asc_sorts() {
465        let mut mx1: MinMax<u32> = vec![2, 2, 2, 5, 10].into_iter().collect();
466        assert_eq!(mx1.min(), Some(&2u32));
467        assert_eq!(mx1.max(), Some(&10u32));
468        assert_eq!(mx1.sort_order(), SortOrder::Ascending);
469
470        let mx2: MinMax<u32> = vec![11, 14, 23, 32, 41].into_iter().collect();
471        assert_eq!(mx2.min(), Some(&11u32));
472        assert_eq!(mx2.max(), Some(&41u32));
473        assert_eq!(mx2.sort_order(), SortOrder::Ascending);
474        mx1.merge(mx2);
475        assert_eq!(mx1.min(), Some(&2u32));
476        assert_eq!(mx1.max(), Some(&41u32));
477        assert_eq!(mx1.sort_order(), SortOrder::Ascending);
478    }
479
480    #[test]
481    fn test_sortiness() {
482        // Test empty
483        let minmax: MinMax<u32> = MinMax::new();
484        assert_eq!(minmax.sortiness(), 0.0);
485
486        // Test single element
487        let minmax: MinMax<u32> = vec![1].into_iter().collect();
488        assert_eq!(minmax.sortiness(), 0.0);
489
490        // Test perfectly ascending
491        let minmax: MinMax<u32> = vec![1, 2, 3, 4, 5].into_iter().collect();
492        assert_eq!(minmax.sortiness(), 1.0);
493
494        // Test perfectly descending
495        let minmax: MinMax<u32> = vec![5, 4, 3, 2, 1].into_iter().collect();
496        assert_eq!(minmax.sortiness(), -1.0);
497
498        // Test all equal
499        let minmax: MinMax<u32> = vec![1, 1, 1, 1].into_iter().collect();
500        assert_eq!(minmax.sortiness(), 1.0); // Equal pairs are considered ascending
501
502        // Test mostly ascending
503        let minmax: MinMax<u32> = vec![1, 2, 4, 3, 5].into_iter().collect();
504        assert!(minmax.sortiness() > 0.0 && minmax.sortiness() < 1.0);
505        assert_eq!(minmax.sortiness(), 0.5); // 2 ascending pairs, 1 descending pair
506
507        // Test mostly descending
508        let minmax: MinMax<u32> = vec![5, 4, 3, 4, 2].into_iter().collect();
509        assert!(minmax.sortiness() < 0.0 && minmax.sortiness() > -1.0);
510        assert_eq!(minmax.sortiness(), -0.5); // 1 ascending pair, 3 descending pairs
511    }
512
513    #[test]
514    fn test_sortiness_merge() {
515        let mut mx1: MinMax<u32> = vec![1, 2, 3].into_iter().collect();
516        let mx2: MinMax<u32> = vec![4, 5, 6].into_iter().collect();
517        assert_eq!(mx1.sortiness(), 1.0);
518        assert_eq!(mx2.sortiness(), 1.0);
519
520        mx1.merge(mx2);
521        assert_eq!(mx1.sortiness(), 1.0); // Should remain perfectly sorted after merge
522
523        let mut mx3: MinMax<u32> = vec![1, 2, 3].into_iter().collect();
524        let mx4: MinMax<u32> = vec![2, 1, 0].into_iter().collect();
525        mx3.merge(mx4);
526        assert_eq!(mx3, vec![1, 2, 3, 2, 1, 0].into_iter().collect());
527        assert!(mx3.sortiness() < 1.0); // Should show mixed sorting after merge
528        assert_eq!(mx3.sortiness(), -0.2);
529    }
530
531    #[test]
532    fn test_merge_single_into_empty() {
533        let mut empty: MinMax<u32> = MinMax::default();
534        let single: MinMax<u32> = vec![42].into_iter().collect();
535
536        assert!(empty.first_value.is_none());
537        assert!(empty.last_value.is_none());
538
539        empty.merge(single);
540
541        assert_eq!(empty.len(), 1);
542        assert_eq!(empty.min(), Some(&42));
543        assert_eq!(empty.max(), Some(&42));
544        assert_eq!(empty.first_value, Some(42));
545        // last_value is None for single-element MinMax (only set from 2nd element onward)
546        assert_eq!(empty.last_value, None);
547    }
548}
549
550#[test]
551fn test_sortiness_simple_alphabetical() {
552    let minmax: MinMax<String> = vec![
553        "a".to_string(),
554        "b".to_string(),
555        "c".to_string(),
556        "d".to_string(),
557    ]
558    .into_iter()
559    .collect();
560    assert_eq!(minmax.sortiness(), 1.0);
561    assert_eq!(minmax.sort_order(), SortOrder::Ascending);
562
563    let minmax: MinMax<String> = vec![
564        "d".to_string(),
565        "c".to_string(),
566        "b".to_string(),
567        "a".to_string(),
568    ]
569    .into_iter()
570    .collect();
571    assert_eq!(minmax.sortiness(), -1.0);
572    assert_eq!(minmax.sort_order(), SortOrder::Descending);
573
574    let minmax: MinMax<String> = vec![
575        "a".to_string(),
576        "b".to_string(),
577        "c".to_string(),
578        "a".to_string(),
579    ]
580    .into_iter()
581    .collect();
582    assert_eq!(minmax.sortiness(), 0.3333333333333333);
583    assert_eq!(minmax.sort_order(), SortOrder::Unsorted);
584}
585
586#[cfg(test)]
587mod test_nan_inf {
588    use super::MinMax;
589
590    #[test]
591    fn test_minmax_nan() {
592        let mut minmax = MinMax::new();
593        minmax.add(1.0f64);
594        minmax.add(f64::NAN);
595        minmax.add(3.0f64);
596        // NaN is unordered for PartialOrd, so it should not update min/max
597        // and adding it should not panic.
598        assert_eq!(minmax.min(), Some(&1.0f64));
599        assert_eq!(minmax.max(), Some(&3.0f64));
600    }
601
602    #[test]
603    fn test_minmax_infinity() {
604        let mut minmax = MinMax::new();
605        minmax.add(1.0f64);
606        minmax.add(f64::INFINITY);
607        minmax.add(f64::NEG_INFINITY);
608        assert_eq!(minmax.min(), Some(&f64::NEG_INFINITY));
609        assert_eq!(minmax.max(), Some(&f64::INFINITY));
610    }
611
612    #[test]
613    fn test_minmax_only_infinities() {
614        let mut minmax = MinMax::new();
615        minmax.add(f64::INFINITY);
616        minmax.add(f64::NEG_INFINITY);
617        assert_eq!(minmax.min(), Some(&f64::NEG_INFINITY));
618        assert_eq!(minmax.max(), Some(&f64::INFINITY));
619        assert_eq!(minmax.len(), 2);
620    }
621
622    #[test]
623    fn test_sortiness_with_infinity() {
624        let minmax: MinMax<f64> = vec![1.0, 2.0, f64::INFINITY].into_iter().collect();
625        assert_eq!(minmax.sortiness(), 1.0); // ascending including infinity
626    }
627}