stats/
unsorted.rs

1use num_traits::ToPrimitive;
2use rayon::iter::{IndexedParallelIterator, IntoParallelRefIterator, ParallelIterator};
3use rayon::prelude::ParallelSlice;
4use rayon::slice::ParallelSliceMut;
5
6use serde::{Deserialize, Serialize};
7
8use {crate::Commute, crate::Partial};
9
10const PARALLEL_THRESHOLD: usize = 10_000;
11
12/// Compute the exact median on a stream of data.
13///
14/// (This has time complexity `O(nlogn)` and space complexity `O(n)`.)
15pub fn median<I>(it: I) -> Option<f64>
16where
17    I: Iterator,
18    <I as Iterator>::Item: PartialOrd + ToPrimitive,
19{
20    it.collect::<Unsorted<_>>().median()
21}
22
23/// Compute the median absolute deviation (MAD) on a stream of data.
24pub fn mad<I>(it: I, precalc_median: Option<f64>) -> Option<f64>
25where
26    I: Iterator,
27    <I as Iterator>::Item: PartialOrd + ToPrimitive,
28{
29    it.collect::<Unsorted<_>>().mad(precalc_median)
30}
31
32/// Compute the exact 1-, 2-, and 3-quartiles (Q1, Q2 a.k.a. median, and Q3) on a stream of data.
33///
34/// (This has time complexity `O(n log n)` and space complexity `O(n)`.)
35pub fn quartiles<I>(it: I) -> Option<(f64, f64, f64)>
36where
37    I: Iterator,
38    <I as Iterator>::Item: PartialOrd + ToPrimitive,
39{
40    it.collect::<Unsorted<_>>().quartiles()
41}
42
43/// Compute the exact mode on a stream of data.
44///
45/// (This has time complexity `O(nlogn)` and space complexity `O(n)`.)
46///
47/// If the data does not have a mode, then `None` is returned.
48pub fn mode<T, I>(it: I) -> Option<T>
49where
50    T: PartialOrd + Clone,
51    I: Iterator<Item = T>,
52{
53    it.collect::<Unsorted<T>>().mode()
54}
55
56/// Compute the modes on a stream of data.
57///
58/// If there is a single mode, then only that value is returned in the `Vec`
59/// however, if there are multiple values tied for occurring the most amount of times
60/// those values are returned.
61///
62/// ## Example
63/// ```
64/// use stats;
65///
66/// let vals = vec![1, 1, 2, 2, 3];
67///
68/// assert_eq!(stats::modes(vals.into_iter()), (vec![1, 2], 2, 2));
69/// ```
70/// This has time complexity `O(n)`
71///
72/// If the data does not have a mode, then an empty `Vec` is returned.
73pub fn modes<T, I>(it: I) -> (Vec<T>, usize, u32)
74where
75    T: PartialOrd + Clone,
76    I: Iterator<Item = T>,
77{
78    it.collect::<Unsorted<T>>().modes()
79}
80
81/// Compute the antimodes on a stream of data.
82///
83/// Antimode is the least frequent non-zero score.
84///
85/// If there is a single antimode, then only that value is returned in the `Vec`
86/// however, if there are multiple values tied for occurring the least amount of times
87/// those values are returned.
88///
89/// Only the first 10 antimodes are returned to prevent returning the whole set
90/// when cardinality = number of records (i.e. all unique values).
91///
92/// ## Example
93/// ```
94/// use stats;
95///
96/// let vals = vec![1, 1, 2, 2, 3];
97///
98/// assert_eq!(stats::antimodes(vals.into_iter()), (vec![3], 1, 1));
99/// ```
100/// This has time complexity `O(n)`
101///
102/// If the data does not have an antimode, then an empty `Vec` is returned.
103pub fn antimodes<T, I>(it: I) -> (Vec<T>, usize, u32)
104where
105    T: PartialOrd + Clone,
106    I: Iterator<Item = T>,
107{
108    let (antimodes_result, antimodes_count, antimodes_occurrences) =
109        it.collect::<Unsorted<T>>().antimodes();
110    (antimodes_result, antimodes_count, antimodes_occurrences)
111}
112
113/// Compute the Gini Coefficient on a stream of data.
114///
115/// The Gini Coefficient measures inequality in a distribution, ranging from 0 (perfect equality)
116/// to 1 (perfect inequality).
117///
118/// (This has time complexity `O(n log n)` and space complexity `O(n)`.)
119pub fn gini<I>(it: I) -> Option<f64>
120where
121    I: Iterator,
122    <I as Iterator>::Item: PartialOrd + ToPrimitive + Sync,
123{
124    it.collect::<Unsorted<_>>().gini()
125}
126
127/// Compute the kurtosis (excess kurtosis) on a stream of data.
128///
129/// Kurtosis measures the "tailedness" of a distribution. Excess kurtosis is kurtosis - 3,
130/// where 0 indicates a normal distribution, positive values indicate heavy tails, and
131/// negative values indicate light tails.
132///
133/// (This has time complexity `O(n log n)` and space complexity `O(n)`.)
134pub fn kurtosis<I>(it: I) -> Option<f64>
135where
136    I: Iterator,
137    <I as Iterator>::Item: PartialOrd + ToPrimitive + Sync,
138{
139    it.collect::<Unsorted<_>>().kurtosis()
140}
141
142/// Compute the percentile rank of a value on a stream of data.
143///
144/// Returns the percentile rank (0-100) of the given value in the distribution.
145/// If the value is less than all values, returns 0.0. If greater than all, returns 100.0.
146///
147/// (This has time complexity `O(n log n)` and space complexity `O(n)`.)
148pub fn percentile_rank<I, V>(it: I, value: V) -> Option<f64>
149where
150    I: Iterator,
151    <I as Iterator>::Item: PartialOrd + ToPrimitive + Sync,
152    V: PartialOrd + ToPrimitive,
153{
154    it.collect::<Unsorted<_>>().percentile_rank(value)
155}
156
157/// Compute the Atkinson Index on a stream of data.
158///
159/// The Atkinson Index measures inequality with an inequality aversion parameter ε.
160/// It ranges from 0 (perfect equality) to 1 (perfect inequality).
161/// Higher ε values give more weight to inequality at the lower end of the distribution.
162///
163/// (This has time complexity `O(n log n)` and space complexity `O(n)`.)
164pub fn atkinson<I>(it: I, epsilon: f64) -> Option<f64>
165where
166    I: Iterator,
167    <I as Iterator>::Item: PartialOrd + ToPrimitive + Sync,
168{
169    it.collect::<Unsorted<_>>().atkinson(epsilon)
170}
171
172fn median_on_sorted<T>(data: &[T]) -> Option<f64>
173where
174    T: PartialOrd + ToPrimitive,
175{
176    Some(match data.len() {
177        // Empty slice case - return None early
178        0 => return None,
179        // Single element case - return that element converted to f64
180        1 => data.first()?.to_f64()?,
181        // Even length case - average the two middle elements
182        len if len.is_multiple_of(2) => {
183            let idx = len / 2;
184            // Safety: we know these indices are valid because we checked len is even and non-zero,
185            // so idx-1 and idx are valid indices into data
186            let v1 = unsafe { data.get_unchecked(idx - 1) }.to_f64()?;
187            let v2 = unsafe { data.get_unchecked(idx) }.to_f64()?;
188            f64::midpoint(v1, v2)
189        }
190        // Odd length case - return the middle element
191        // Safety: we know the index is within bounds
192        len => unsafe { data.get_unchecked(len / 2) }.to_f64()?,
193    })
194}
195
196fn mad_on_sorted<T>(data: &[T], precalc_median: Option<f64>) -> Option<f64>
197where
198    T: Sync + PartialOrd + ToPrimitive,
199{
200    if data.is_empty() {
201        return None;
202    }
203    let median_obs = precalc_median.unwrap_or_else(|| median_on_sorted(data).unwrap());
204
205    // Use adaptive parallel processing based on data size
206    let mut abs_diff_vec = if data.len() < PARALLEL_THRESHOLD {
207        // Sequential processing for small datasets
208        let mut vec = Vec::with_capacity(data.len());
209        for x in data {
210            vec.push((median_obs - unsafe { x.to_f64().unwrap_unchecked() }).abs());
211        }
212        vec
213    } else {
214        // Parallel processing for large datasets
215        data.par_iter()
216            .map(|x| (median_obs - unsafe { x.to_f64().unwrap_unchecked() }).abs())
217            .collect()
218    };
219
220    // Use adaptive sorting based on size
221    if abs_diff_vec.len() < PARALLEL_THRESHOLD {
222        abs_diff_vec.sort_unstable_by(|a, b| unsafe { a.partial_cmp(b).unwrap_unchecked() });
223    } else {
224        abs_diff_vec.par_sort_unstable_by(|a, b| unsafe { a.partial_cmp(b).unwrap_unchecked() });
225    }
226    median_on_sorted(&abs_diff_vec)
227}
228
229fn gini_on_sorted<T>(data: &[Partial<T>]) -> Option<f64>
230where
231    T: Sync + PartialOrd + ToPrimitive,
232{
233    let len = data.len();
234
235    // Early return for empty data
236    if len == 0 {
237        return None;
238    }
239
240    // Single element case: perfect equality, Gini = 0
241    if len == 1 {
242        return Some(0.0);
243    }
244
245    // Use adaptive parallel processing based on data size
246    let (sum, weighted_sum) = if len < PARALLEL_THRESHOLD {
247        // Sequential processing for small datasets
248        let mut sum = 0.0;
249        let mut weighted_sum = 0.0;
250
251        for (i, x) in data.iter().enumerate() {
252            let val = x.0.to_f64()?;
253            sum += val;
254            weighted_sum += (i + 1) as f64 * val;
255        }
256
257        (sum, weighted_sum)
258    } else {
259        // Parallel processing for large datasets
260        // Convert to f64 and compute sum in parallel
261        let sum: f64 = data
262            .par_iter()
263            .map(|x| unsafe { x.0.to_f64().unwrap_unchecked() })
264            .sum();
265
266        // Compute weighted sum in parallel using enumerate to get indices
267        let weighted_sum: f64 = data
268            .par_iter()
269            .enumerate()
270            .map(|(i, x)| {
271                let val = unsafe { x.0.to_f64().unwrap_unchecked() };
272                (i + 1) as f64 * val
273            })
274            .sum();
275
276        (sum, weighted_sum)
277    };
278
279    // If sum is zero, Gini is undefined
280    if sum == 0.0 {
281        return None;
282    }
283
284    // Compute Gini coefficient using the formula:
285    // G = (2 * Σ(i * y_i)) / (n * Σ(y_i)) - (n + 1) / n
286    // where i is 1-indexed rank and y_i are sorted values
287    let n = len as f64;
288    let gini = (2.0 * weighted_sum) / (n * sum) - (n + 1.0) / n;
289
290    Some(gini)
291}
292
293fn kurtosis_on_sorted<T>(data: &[Partial<T>]) -> Option<f64>
294where
295    T: Sync + PartialOrd + ToPrimitive,
296{
297    let len = data.len();
298
299    // Need at least 4 elements for meaningful kurtosis
300    if len < 4 {
301        return None;
302    }
303
304    // Compute mean first
305    let sum: f64 = if len < PARALLEL_THRESHOLD {
306        let mut sum = 0.0;
307        for x in data {
308            sum += x.0.to_f64()?;
309        }
310        sum
311    } else {
312        data.par_iter()
313            .map(|x| unsafe { x.0.to_f64().unwrap_unchecked() })
314            .sum()
315    };
316
317    let mean = sum / len as f64;
318
319    // Compute variance and sum of fourth powers
320    let (variance_sum, fourth_power_sum) = if len < PARALLEL_THRESHOLD {
321        let mut variance_sum = 0.0;
322        let mut fourth_power_sum = 0.0;
323
324        for x in data {
325            let val = x.0.to_f64()?;
326            let diff = val - mean;
327            let diff_sq = diff * diff;
328            variance_sum += diff_sq;
329            fourth_power_sum += diff_sq * diff_sq;
330        }
331
332        (variance_sum, fourth_power_sum)
333    } else {
334        let variance_sum: f64 = data
335            .par_iter()
336            .map(|x| {
337                let val = unsafe { x.0.to_f64().unwrap_unchecked() };
338                let diff = val - mean;
339                diff * diff
340            })
341            .sum();
342
343        let fourth_power_sum: f64 = data
344            .par_iter()
345            .map(|x| {
346                let val = unsafe { x.0.to_f64().unwrap_unchecked() };
347                let diff = val - mean;
348                let diff_sq = diff * diff;
349                diff_sq * diff_sq
350            })
351            .sum();
352
353        (variance_sum, fourth_power_sum)
354    };
355
356    let variance = variance_sum / len as f64;
357
358    // If variance is zero, all values are the same, kurtosis is undefined
359    if variance == 0.0 {
360        return None;
361    }
362
363    let n = len as f64;
364    let variance_sq = variance * variance;
365
366    // Sample excess kurtosis formula:
367    // kurtosis = (n(n+1) * Σ((x_i - mean)⁴)) / ((n-1)(n-2)(n-3) * variance²) - 3(n-1)²/((n-2)(n-3))
368    let kurtosis = (n * (n + 1.0) * fourth_power_sum)
369        / ((n - 1.0) * (n - 2.0) * (n - 3.0) * variance_sq)
370        - 3.0 * (n - 1.0) * (n - 1.0) / ((n - 2.0) * (n - 3.0));
371
372    Some(kurtosis)
373}
374
375fn percentile_rank_on_sorted<T, V>(data: &[Partial<T>], value: &V) -> Option<f64>
376where
377    T: PartialOrd + ToPrimitive,
378    V: PartialOrd + ToPrimitive,
379{
380    let len = data.len();
381
382    if len == 0 {
383        return None;
384    }
385
386    let value_f64 = value.to_f64()?;
387
388    // Binary search to find the position where value would be inserted
389    // This gives us the number of values <= value
390    let count_leq = data.binary_search_by(|x| {
391        x.0.to_f64()
392            .unwrap_or(f64::NAN)
393            .partial_cmp(&value_f64)
394            .unwrap_or(std::cmp::Ordering::Less)
395    });
396
397    let count = match count_leq {
398        Ok(idx) => {
399            // Value found at idx, but we need to count all equal values
400            let mut count = idx + 1;
401            // Count all equal values after idx
402            for x in data.iter().skip(idx + 1) {
403                if let Some(x_val) = x.0.to_f64() {
404                    if (x_val - value_f64).abs() < 1e-9 {
405                        count += 1;
406                    } else {
407                        break;
408                    }
409                } else {
410                    break;
411                }
412            }
413            count
414        }
415        Err(idx) => idx, // Number of values less than value
416    };
417
418    // Percentile rank = (count / n) * 100
419    Some((count as f64 / len as f64) * 100.0)
420}
421
422fn atkinson_on_sorted<T>(data: &[Partial<T>], epsilon: f64) -> Option<f64>
423where
424    T: Sync + PartialOrd + ToPrimitive,
425{
426    let len = data.len();
427
428    // Early return for empty data
429    if len == 0 {
430        return None;
431    }
432
433    // Single element case: perfect equality, Atkinson = 0
434    if len == 1 {
435        return Some(0.0);
436    }
437
438    // Epsilon must be non-negative
439    if epsilon < 0.0 {
440        return None;
441    }
442
443    // Compute mean
444    let sum: f64 = if len < PARALLEL_THRESHOLD {
445        let mut sum = 0.0;
446        for x in data {
447            sum += x.0.to_f64()?;
448        }
449        sum
450    } else {
451        data.par_iter()
452            .map(|x| unsafe { x.0.to_f64().unwrap_unchecked() })
453            .sum()
454    };
455
456    let mean = sum / len as f64;
457
458    // If mean is zero, Atkinson is undefined
459    if mean == 0.0 {
460        return None;
461    }
462
463    // Handle special case: epsilon = 1 (uses geometric mean)
464    if (epsilon - 1.0).abs() < 1e-10 {
465        // A_1 = 1 - (geometric_mean / mean)
466        let geometric_sum: f64 = if len < PARALLEL_THRESHOLD {
467            let mut sum = 0.0;
468            for x in data {
469                let val = x.0.to_f64()?;
470                if val <= 0.0 {
471                    // Geometric mean undefined for non-positive values
472                    return None;
473                }
474                sum += val.ln();
475            }
476            sum
477        } else {
478            data.par_iter()
479                .map(|x| {
480                    let val = unsafe { x.0.to_f64().unwrap_unchecked() };
481                    if val <= 0.0 {
482                        return f64::NAN;
483                    }
484                    val.ln()
485                })
486                .sum()
487        };
488
489        if geometric_sum.is_nan() {
490            return None;
491        }
492
493        let geometric_mean = (geometric_sum / len as f64).exp();
494        return Some(1.0 - geometric_mean / mean);
495    }
496
497    // General case: epsilon != 1
498    // A_ε = 1 - (1/n * Σ((x_i/mean)^(1-ε)))^(1/(1-ε))
499    let exponent = 1.0 - epsilon;
500
501    let sum_powered: f64 = if len < PARALLEL_THRESHOLD {
502        let mut sum = 0.0;
503        for x in data {
504            let val = x.0.to_f64()?;
505            if val < 0.0 {
506                // Negative values with non-integer exponent are undefined
507                return None;
508            }
509            let ratio = val / mean;
510            sum += ratio.powf(exponent);
511        }
512        sum
513    } else {
514        data.par_iter()
515            .map(|x| {
516                let val = unsafe { x.0.to_f64().unwrap_unchecked() };
517                if val < 0.0 {
518                    return f64::NAN;
519                }
520                let ratio = val / mean;
521                ratio.powf(exponent)
522            })
523            .sum()
524    };
525
526    if sum_powered.is_nan() || sum_powered <= 0.0 {
527        return None;
528    }
529
530    let atkinson = 1.0 - (sum_powered / len as f64).powf(1.0 / exponent);
531    Some(atkinson)
532}
533
534/// Selection algorithm to find the k-th smallest element in O(n) average time.
535/// This is an implementation of quickselect algorithm.
536fn quickselect<T>(data: &mut [Partial<T>], k: usize) -> Option<&T>
537where
538    T: PartialOrd,
539{
540    if data.is_empty() || k >= data.len() {
541        return None;
542    }
543
544    let mut left = 0;
545    let mut right = data.len() - 1;
546
547    loop {
548        if left == right {
549            return Some(&data[left].0);
550        }
551
552        // Use median-of-three pivot selection for better performance
553        let pivot_idx = median_of_three_pivot(data, left, right);
554        let pivot_idx = partition(data, left, right, pivot_idx);
555
556        match k.cmp(&pivot_idx) {
557            std::cmp::Ordering::Equal => return Some(&data[pivot_idx].0),
558            std::cmp::Ordering::Less => right = pivot_idx - 1,
559            std::cmp::Ordering::Greater => left = pivot_idx + 1,
560        }
561    }
562}
563
564/// Zero-copy selection algorithm that works with indices instead of copying data.
565/// This avoids the overhead of cloning data elements.
566fn quickselect_by_index<'a, T>(
567    data: &'a [Partial<T>],
568    indices: &mut [usize],
569    k: usize,
570) -> Option<&'a T>
571where
572    T: PartialOrd,
573{
574    if data.is_empty() || indices.is_empty() || k >= indices.len() {
575        return None;
576    }
577
578    let mut left = 0;
579    let mut right = indices.len() - 1;
580
581    loop {
582        if left == right {
583            return Some(&data[indices[left]].0);
584        }
585
586        // Use median-of-three pivot selection for better performance
587        let pivot_idx = median_of_three_pivot_by_index(data, indices, left, right);
588        let pivot_idx = partition_by_index(data, indices, left, right, pivot_idx);
589
590        match k.cmp(&pivot_idx) {
591            std::cmp::Ordering::Equal => return Some(&data[indices[pivot_idx]].0),
592            std::cmp::Ordering::Less => right = pivot_idx - 1,
593            std::cmp::Ordering::Greater => left = pivot_idx + 1,
594        }
595    }
596}
597
598/// Select the median of three elements as pivot for better quickselect performance
599fn median_of_three_pivot<T>(data: &[Partial<T>], left: usize, right: usize) -> usize
600where
601    T: PartialOrd,
602{
603    let mid = left + (right - left) / 2;
604
605    if data[left] <= data[mid] {
606        if data[mid] <= data[right] {
607            mid
608        } else if data[left] <= data[right] {
609            right
610        } else {
611            left
612        }
613    } else if data[left] <= data[right] {
614        left
615    } else if data[mid] <= data[right] {
616        right
617    } else {
618        mid
619    }
620}
621
622/// Select the median of three elements as pivot using indices
623fn median_of_three_pivot_by_index<T>(
624    data: &[Partial<T>],
625    indices: &[usize],
626    left: usize,
627    right: usize,
628) -> usize
629where
630    T: PartialOrd,
631{
632    let mid = left + (right - left) / 2;
633
634    if data[indices[left]] <= data[indices[mid]] {
635        if data[indices[mid]] <= data[indices[right]] {
636            mid
637        } else if data[indices[left]] <= data[indices[right]] {
638            right
639        } else {
640            left
641        }
642    } else if data[indices[left]] <= data[indices[right]] {
643        left
644    } else if data[indices[mid]] <= data[indices[right]] {
645        right
646    } else {
647        mid
648    }
649}
650
651/// Partition function for quickselect
652fn partition<T>(data: &mut [Partial<T>], left: usize, right: usize, pivot_idx: usize) -> usize
653where
654    T: PartialOrd,
655{
656    // Move pivot to end
657    data.swap(pivot_idx, right);
658    let mut store_idx = left;
659
660    // Move all elements smaller than pivot to the left
661    // Cache pivot position for better cache locality (access data[right] directly each time)
662    for i in left..right {
663        // Safety: i, store_idx, and right are guaranteed to be in bounds
664        // Compare directly with pivot at data[right] - compiler should optimize this access
665        if unsafe { data.get_unchecked(i) <= data.get_unchecked(right) } {
666            data.swap(i, store_idx);
667            store_idx += 1;
668        }
669    }
670
671    // Move pivot to its final place
672    data.swap(store_idx, right);
673    store_idx
674}
675
676/// Partition function for quickselect using indices
677fn partition_by_index<T>(
678    data: &[Partial<T>],
679    indices: &mut [usize],
680    left: usize,
681    right: usize,
682    pivot_idx: usize,
683) -> usize
684where
685    T: PartialOrd,
686{
687    // Move pivot to end
688    indices.swap(pivot_idx, right);
689    let mut store_idx = left;
690
691    // Cache pivot index and value for better cache locality
692    // This reduces indirection: indices[right] -> data[indices[right]]
693    // Safety: right is guaranteed to be in bounds
694    let pivot_idx_cached = unsafe { *indices.get_unchecked(right) };
695    let pivot_val = unsafe { data.get_unchecked(pivot_idx_cached) };
696
697    // Move all elements smaller than pivot to the left
698    let mut elem_idx: usize;
699    for i in left..right {
700        // Safety: i and store_idx are guaranteed to be in bounds
701        elem_idx = unsafe { *indices.get_unchecked(i) };
702        if unsafe { data.get_unchecked(elem_idx) <= pivot_val } {
703            indices.swap(i, store_idx);
704            store_idx += 1;
705        }
706    }
707
708    // Move pivot to its final place
709    indices.swap(store_idx, right);
710    store_idx
711}
712
713// This implementation follows Method 3 from https://en.wikipedia.org/wiki/Quartile
714// It divides data into quarters based on the length n = 4k + r where r is remainder.
715// For each remainder case (0,1,2,3), it uses different formulas to compute Q1, Q2, Q3.
716fn quartiles_on_sorted<T>(data: &[Partial<T>]) -> Option<(f64, f64, f64)>
717where
718    T: PartialOrd + ToPrimitive,
719{
720    let len = data.len();
721
722    // Early return for small arrays
723    match len {
724        0..=2 => return None,
725        3 => {
726            return Some(
727                // SAFETY: We know these indices are valid because len == 3
728                unsafe {
729                    (
730                        data.get_unchecked(0).0.to_f64()?,
731                        data.get_unchecked(1).0.to_f64()?,
732                        data.get_unchecked(2).0.to_f64()?,
733                    )
734                },
735            );
736        }
737        _ => {}
738    }
739
740    // Calculate k and remainder in one division
741    let k = len / 4;
742    let remainder = len % 4;
743
744    // SAFETY: All index calculations below are guaranteed to be in bounds
745    // because we've verified len >= 4 above and k is len/4
746    unsafe {
747        Some(match remainder {
748            0 => {
749                // Let data = {x_i}_{i=0..4k} where k is positive integer.
750                // Median q2 = (x_{2k-1} + x_{2k}) / 2.
751                // If we divide data into two parts {x_i < q2} as L and
752                // {x_i > q2} as R, #L == #R == 2k holds true. Thus,
753                // q1 = (x_{k-1} + x_{k}) / 2 and q3 = (x_{3k-1} + x_{3k}) / 2.
754                // =============
755                // Simply put: Length is multiple of 4 (4k)
756                // q1 = (x_{k-1} + x_k) / 2
757                // q2 = (x_{2k-1} + x_{2k}) / 2
758                // q3 = (x_{3k-1} + x_{3k}) / 2
759                let q1 = f64::midpoint(
760                    data.get_unchecked(k - 1).0.to_f64()?,
761                    data.get_unchecked(k).0.to_f64()?,
762                );
763                let q2 = f64::midpoint(
764                    data.get_unchecked(2 * k - 1).0.to_f64()?,
765                    data.get_unchecked(2 * k).0.to_f64()?,
766                );
767                let q3 = f64::midpoint(
768                    data.get_unchecked(3 * k - 1).0.to_f64()?,
769                    data.get_unchecked(3 * k).0.to_f64()?,
770                );
771                (q1, q2, q3)
772            }
773            1 => {
774                // Let data = {x_i}_{i=0..4k+1} where k is positive integer.
775                // Median q2 = x_{2k}.
776                // If we divide data other than q2 into two parts {x_i < q2}
777                // as L and {x_i > q2} as R, #L == #R == 2k holds true. Thus,
778                // q1 = (x_{k-1} + x_{k}) / 2 and q3 = (x_{3k} + x_{3k+1}) / 2.
779                // =============
780                // Simply put: Length is 4k + 1
781                // q1 = (x_{k-1} + x_k) / 2
782                // q2 = x_{2k}
783                // q3 = (x_{3k} + x_{3k+1}) / 2
784                let q1 = f64::midpoint(
785                    data.get_unchecked(k - 1).0.to_f64()?,
786                    data.get_unchecked(k).0.to_f64()?,
787                );
788                let q2 = data.get_unchecked(2 * k).0.to_f64()?;
789                let q3 = f64::midpoint(
790                    data.get_unchecked(3 * k).0.to_f64()?,
791                    data.get_unchecked(3 * k + 1).0.to_f64()?,
792                );
793                (q1, q2, q3)
794            }
795            2 => {
796                // Let data = {x_i}_{i=0..4k+2} where k is positive integer.
797                // Median q2 = (x_{(2k+1)-1} + x_{2k+1}) / 2.
798                // If we divide data into two parts {x_i < q2} as L and
799                // {x_i > q2} as R, it's true that #L == #R == 2k+1.
800                // Thus, q1 = x_{k} and q3 = x_{3k+1}.
801                // =============
802                // Simply put: Length is 4k + 2
803                // q1 = x_k
804                // q2 = (x_{2k} + x_{2k+1}) / 2
805                // q3 = x_{3k+1}
806                let q1 = data.get_unchecked(k).0.to_f64()?;
807                let q2 = f64::midpoint(
808                    data.get_unchecked(2 * k).0.to_f64()?,
809                    data.get_unchecked(2 * k + 1).0.to_f64()?,
810                );
811                let q3 = data.get_unchecked(3 * k + 1).0.to_f64()?;
812                (q1, q2, q3)
813            }
814            _ => {
815                // Let data = {x_i}_{i=0..4k+3} where k is positive integer.
816                // Median q2 = x_{2k+1}.
817                // If we divide data other than q2 into two parts {x_i < q2}
818                // as L and {x_i > q2} as R, #L == #R == 2k+1 holds true.
819                // Thus, q1 = x_{k} and q3 = x_{3k+2}.
820                // =============
821                // Simply put: Length is 4k + 3
822                // q1 = x_k
823                // q2 = x_{2k+1}
824                // q3 = x_{3k+2}
825                let q1 = data.get_unchecked(k).0.to_f64()?;
826                let q2 = data.get_unchecked(2 * k + 1).0.to_f64()?;
827                let q3 = data.get_unchecked(3 * k + 2).0.to_f64()?;
828                (q1, q2, q3)
829            }
830        })
831    }
832}
833
834/// Compute quartiles using selection algorithm in O(n) time instead of O(n log n) sorting.
835/// This implementation follows Method 3 from `<https://en.wikipedia.org/wiki/Quartile>`
836fn quartiles_with_selection<T>(data: &mut [Partial<T>]) -> Option<(f64, f64, f64)>
837where
838    T: PartialOrd + ToPrimitive,
839{
840    let len = data.len();
841
842    // Early return for small arrays
843    match len {
844        0..=2 => return None,
845        3 => {
846            // For 3 elements, we need to find the sorted order using selection
847            let min_val = quickselect(data, 0)?.to_f64()?;
848            let med_val = quickselect(data, 1)?.to_f64()?;
849            let max_val = quickselect(data, 2)?.to_f64()?;
850            return Some((min_val, med_val, max_val));
851        }
852        _ => {}
853    }
854
855    // Calculate k and remainder in one division
856    let k = len / 4;
857    let remainder = len % 4;
858
859    // Use selection algorithm to find the required order statistics
860    match remainder {
861        0 => {
862            // Length is multiple of 4 (4k)
863            // Q1 = (x_{k-1} + x_k) / 2
864            // Q2 = (x_{2k-1} + x_{2k}) / 2
865            // Q3 = (x_{3k-1} + x_{3k}) / 2
866            let q1_low = quickselect(data, k - 1)?.to_f64()?;
867            let q1_high = quickselect(data, k)?.to_f64()?;
868            let q1 = f64::midpoint(q1_low, q1_high);
869
870            let q2_low = quickselect(data, 2 * k - 1)?.to_f64()?;
871            let q2_high = quickselect(data, 2 * k)?.to_f64()?;
872            let q2 = f64::midpoint(q2_low, q2_high);
873
874            let q3_low = quickselect(data, 3 * k - 1)?.to_f64()?;
875            let q3_high = quickselect(data, 3 * k)?.to_f64()?;
876            let q3 = f64::midpoint(q3_low, q3_high);
877
878            Some((q1, q2, q3))
879        }
880        1 => {
881            // Length is 4k + 1
882            // Q1 = (x_{k-1} + x_k) / 2
883            // Q2 = x_{2k}
884            // Q3 = (x_{3k} + x_{3k+1}) / 2
885            let q1_low = quickselect(data, k - 1)?.to_f64()?;
886            let q1_high = quickselect(data, k)?.to_f64()?;
887            let q1 = f64::midpoint(q1_low, q1_high);
888
889            let q2 = quickselect(data, 2 * k)?.to_f64()?;
890
891            let q3_low = quickselect(data, 3 * k)?.to_f64()?;
892            let q3_high = quickselect(data, 3 * k + 1)?.to_f64()?;
893            let q3 = f64::midpoint(q3_low, q3_high);
894
895            Some((q1, q2, q3))
896        }
897        2 => {
898            // Length is 4k + 2
899            // Q1 = x_k
900            // Q2 = (x_{2k} + x_{2k+1}) / 2
901            // Q3 = x_{3k+1}
902            let q1 = quickselect(data, k)?.to_f64()?;
903
904            let q2_low = quickselect(data, 2 * k)?.to_f64()?;
905            let q2_high = quickselect(data, 2 * k + 1)?.to_f64()?;
906            let q2 = f64::midpoint(q2_low, q2_high);
907
908            let q3 = quickselect(data, 3 * k + 1)?.to_f64()?;
909
910            Some((q1, q2, q3))
911        }
912        _ => {
913            // Length is 4k + 3
914            // Q1 = x_k
915            // Q2 = x_{2k+1}
916            // Q3 = x_{3k+2}
917            let q1 = quickselect(data, k)?.to_f64()?;
918            let q2 = quickselect(data, 2 * k + 1)?.to_f64()?;
919            let q3 = quickselect(data, 3 * k + 2)?.to_f64()?;
920
921            Some((q1, q2, q3))
922        }
923    }
924}
925
926/// Zero-copy quartiles computation using index-based selection.
927/// This avoids copying data by working with an array of indices.
928fn quartiles_with_zero_copy_selection<T>(data: &[Partial<T>]) -> Option<(f64, f64, f64)>
929where
930    T: PartialOrd + ToPrimitive,
931{
932    let len = data.len();
933
934    // Early return for small arrays
935    match len {
936        0..=2 => return None,
937        3 => {
938            // For 3 elements, create indices and find sorted order
939            let mut indices = vec![0, 1, 2];
940            let min_val = quickselect_by_index(data, &mut indices, 0)?.to_f64()?;
941            let med_val = quickselect_by_index(data, &mut indices, 1)?.to_f64()?;
942            let max_val = quickselect_by_index(data, &mut indices, 2)?.to_f64()?;
943            return Some((min_val, med_val, max_val));
944        }
945        _ => {}
946    }
947
948    // Calculate k and remainder in one division
949    let k = len / 4;
950    let remainder = len % 4;
951
952    // Allocate indices array once and reuse it by resetting after each call
953    let indices_template: Vec<usize> = (0..len).collect();
954    #[allow(unused)]
955    let mut indices: Vec<usize> = Vec::with_capacity(len);
956
957    // Use zero-copy selection algorithm to find the required order statistics
958    // Note: Each quickselect_by_index call mutates the indices array, so we need
959    // to reset it for each call to ensure correctness.
960    match remainder {
961        0 => {
962            // Length is multiple of 4 (4k)
963            indices.clone_from(&indices_template);
964            let q1_low = quickselect_by_index(data, &mut indices, k - 1)?.to_f64()?;
965            indices.clone_from(&indices_template);
966            let q1_high = quickselect_by_index(data, &mut indices, k)?.to_f64()?;
967            let q1 = f64::midpoint(q1_low, q1_high);
968
969            indices.clone_from(&indices_template);
970            let q2_low = quickselect_by_index(data, &mut indices, 2 * k - 1)?.to_f64()?;
971            indices.clone_from(&indices_template);
972            let q2_high = quickselect_by_index(data, &mut indices, 2 * k)?.to_f64()?;
973            let q2 = f64::midpoint(q2_low, q2_high);
974
975            indices.clone_from(&indices_template);
976            let q3_low = quickselect_by_index(data, &mut indices, 3 * k - 1)?.to_f64()?;
977            indices.clone_from(&indices_template);
978            let q3_high = quickselect_by_index(data, &mut indices, 3 * k)?.to_f64()?;
979            let q3 = f64::midpoint(q3_low, q3_high);
980
981            Some((q1, q2, q3))
982        }
983        1 => {
984            // Length is 4k + 1
985            indices.clone_from(&indices_template);
986            let q1_low = quickselect_by_index(data, &mut indices, k - 1)?.to_f64()?;
987            indices.clone_from(&indices_template);
988            let q1_high = quickselect_by_index(data, &mut indices, k)?.to_f64()?;
989            let q1 = f64::midpoint(q1_low, q1_high);
990
991            indices.clone_from(&indices_template);
992            let q2 = quickselect_by_index(data, &mut indices, 2 * k)?.to_f64()?;
993
994            indices.clone_from(&indices_template);
995            let q3_low = quickselect_by_index(data, &mut indices, 3 * k)?.to_f64()?;
996            indices.clone_from(&indices_template);
997            let q3_high = quickselect_by_index(data, &mut indices, 3 * k + 1)?.to_f64()?;
998            let q3 = f64::midpoint(q3_low, q3_high);
999
1000            Some((q1, q2, q3))
1001        }
1002        2 => {
1003            // Length is 4k + 2
1004            indices.clone_from(&indices_template);
1005            let q1 = quickselect_by_index(data, &mut indices, k)?.to_f64()?;
1006
1007            indices.clone_from(&indices_template);
1008            let q2_low = quickselect_by_index(data, &mut indices, 2 * k)?.to_f64()?;
1009            indices.clone_from(&indices_template);
1010            let q2_high = quickselect_by_index(data, &mut indices, 2 * k + 1)?.to_f64()?;
1011            let q2 = f64::midpoint(q2_low, q2_high);
1012
1013            indices.clone_from(&indices_template);
1014            let q3 = quickselect_by_index(data, &mut indices, 3 * k + 1)?.to_f64()?;
1015
1016            Some((q1, q2, q3))
1017        }
1018        _ => {
1019            // Length is 4k + 3
1020            indices.clone_from(&indices_template);
1021            let q1 = quickselect_by_index(data, &mut indices, k)?.to_f64()?;
1022            indices.clone_from(&indices_template);
1023            let q2 = quickselect_by_index(data, &mut indices, 2 * k + 1)?.to_f64()?;
1024            indices.clone_from(&indices_template);
1025            let q3 = quickselect_by_index(data, &mut indices, 3 * k + 2)?.to_f64()?;
1026
1027            Some((q1, q2, q3))
1028        }
1029    }
1030}
1031
1032fn mode_on_sorted<T, I>(it: I) -> Option<T>
1033where
1034    T: PartialOrd,
1035    I: Iterator<Item = T>,
1036{
1037    use std::cmp::Ordering;
1038
1039    // This approach to computing the mode works very nicely when the
1040    // number of samples is large and is close to its cardinality.
1041    // In other cases, a hashmap would be much better.
1042    // But really, how can we know this when given an arbitrary stream?
1043    // Might just switch to a hashmap to track frequencies. That would also
1044    // be generally useful for discovering the cardinality of a sample.
1045    let (mut mode, mut next) = (None, None);
1046    let (mut mode_count, mut next_count) = (0usize, 0usize);
1047    for x in it {
1048        if mode.as_ref() == Some(&x) {
1049            mode_count += 1;
1050        } else if next.as_ref() == Some(&x) {
1051            next_count += 1;
1052        } else {
1053            next = Some(x);
1054            next_count = 0;
1055        }
1056
1057        match next_count.cmp(&mode_count) {
1058            Ordering::Greater => {
1059                mode = next;
1060                mode_count = next_count;
1061                next = None;
1062                next_count = 0;
1063            }
1064            Ordering::Equal => {
1065                mode = None;
1066                mode_count = 0;
1067            }
1068            Ordering::Less => {}
1069        }
1070    }
1071    mode
1072}
1073
1074/// Computes both modes and antimodes from a sorted slice of values.
1075/// This version works with references to avoid unnecessary cloning.
1076///
1077/// # Arguments
1078///
1079/// * `data` - A sorted slice of values
1080///
1081/// # Notes
1082///
1083/// - Mode is the most frequently occurring value(s)
1084/// - Antimode is the least frequently occurring value(s)
1085/// - Only returns up to 10 antimodes to avoid returning the full set when all values are unique
1086/// - For empty slices, returns empty vectors and zero counts
1087/// - For single value slices, returns that value as the mode and empty antimode
1088/// - When all values occur exactly once, returns empty mode and up to 10 values as antimodes
1089///
1090/// # Returns
1091///
1092/// A tuple containing:
1093/// * Modes information: `(Vec<T>, usize, u32)` where:
1094///   - Vec<T>: Vector containing the mode values
1095///   - usize: Number of modes found
1096///   - u32: Frequency/count of the mode values
1097/// * Antimodes information: `(Vec<T>, usize, u32)` where:
1098///   - Vec<T>: Vector containing up to 10 antimode values
1099///   - usize: Total number of antimodes
1100///   - u32: Frequency/count of the antimode values
1101#[allow(clippy::type_complexity)]
1102#[inline]
1103fn modes_and_antimodes_on_sorted_slice<T>(
1104    data: &[Partial<T>],
1105) -> ((Vec<T>, usize, u32), (Vec<T>, usize, u32))
1106where
1107    T: PartialOrd + Clone,
1108{
1109    let size = data.len();
1110
1111    // Early return for empty slice
1112    if size == 0 {
1113        return ((Vec::new(), 0, 0), (Vec::new(), 0, 0));
1114    }
1115
1116    // Estimate capacity using integer square root of size
1117    // Integer square root using binary search (faster than floating point sqrt)
1118    let sqrt_size = if size == 0 {
1119        0
1120    } else {
1121        let mut x = size;
1122        let mut y = x.div_ceil(2);
1123        while y < x {
1124            x = y;
1125            y = usize::midpoint(x, size / x);
1126        }
1127        x
1128    };
1129    let mut runs: Vec<(&T, u32)> = Vec::with_capacity(sqrt_size.clamp(16, 1_000));
1130
1131    let mut current_value = &data[0].0;
1132    let mut current_count = 1;
1133    let mut highest_count = 1;
1134    let mut lowest_count = u32::MAX;
1135
1136    // Count consecutive runs - optimized to reduce allocations
1137    for x in data.iter().skip(1) {
1138        if x.0 == *current_value {
1139            current_count += 1;
1140            highest_count = highest_count.max(current_count);
1141        } else {
1142            runs.push((current_value, current_count));
1143            lowest_count = lowest_count.min(current_count);
1144            current_value = &x.0;
1145            current_count = 1;
1146        }
1147    }
1148    runs.push((current_value, current_count));
1149    lowest_count = lowest_count.min(current_count);
1150
1151    // Early return if only one unique value
1152    if runs.len() == 1 {
1153        let (val, count) = runs.pop().unwrap();
1154        return ((vec![val.clone()], 1, count), (Vec::new(), 0, 0));
1155    }
1156
1157    // Special case: if all values appear exactly once
1158    if highest_count == 1 {
1159        let antimodes_count = runs.len().min(10);
1160        let total_count = runs.len();
1161        let mut antimodes = Vec::with_capacity(antimodes_count);
1162        for (val, _) in runs.into_iter().take(antimodes_count) {
1163            antimodes.push(val.clone());
1164        }
1165        // For modes: empty, count 0, occurrences 0 (not 1, 1)
1166        return ((Vec::new(), 0, 0), (antimodes, total_count, 1));
1167    }
1168
1169    // Estimate capacities based on the number of runs
1170    // For modes: typically 1-3 modes, rarely more than 10% of runs
1171    // For antimodes: we only collect up to 10, but need to count all
1172    let estimated_modes = (runs.len() / 10).clamp(1, 10);
1173    let estimated_antimodes = 10.min(runs.len());
1174
1175    // Collect indices first to avoid unnecessary cloning
1176    let mut modes_indices = Vec::with_capacity(estimated_modes);
1177    let mut antimodes_indices = Vec::with_capacity(estimated_antimodes);
1178    let mut mode_count = 0;
1179    let mut antimodes_count = 0;
1180    let mut antimodes_collected = 0_u32;
1181
1182    // Count and collect mode/antimode indices simultaneously
1183    for (idx, (_, count)) in runs.iter().enumerate() {
1184        if *count == highest_count {
1185            modes_indices.push(idx);
1186            mode_count += 1;
1187        }
1188        if *count == lowest_count {
1189            antimodes_count += 1;
1190            if antimodes_collected < 10 {
1191                antimodes_indices.push(idx);
1192                antimodes_collected += 1;
1193            }
1194        }
1195    }
1196
1197    // Extract values only for the indices we need, cloning only at the end
1198    let modes_result: Vec<T> = modes_indices
1199        .into_iter()
1200        .map(|idx| runs[idx].0.clone())
1201        .collect();
1202    let antimodes_result: Vec<T> = antimodes_indices
1203        .into_iter()
1204        .map(|idx| runs[idx].0.clone())
1205        .collect();
1206
1207    (
1208        (modes_result, mode_count, highest_count),
1209        (antimodes_result, antimodes_count, lowest_count),
1210    )
1211}
1212
1213/// A commutative data structure for lazily sorted sequences of data.
1214///
1215/// The sort does not occur until statistics need to be computed.
1216///
1217/// Note that this works on types that do not define a total ordering like
1218/// `f32` and `f64`. When an ordering is not defined, an arbitrary order
1219/// is returned.
1220#[allow(clippy::unsafe_derive_deserialize)]
1221#[derive(Clone, Serialize, Deserialize, Eq, PartialEq)]
1222pub struct Unsorted<T> {
1223    sorted: bool,
1224    data: Vec<Partial<T>>,
1225}
1226
1227impl<T: PartialOrd> Unsorted<T> {
1228    /// Create initial empty state.
1229    #[inline]
1230    #[must_use]
1231    pub fn new() -> Unsorted<T> {
1232        Default::default()
1233    }
1234
1235    /// Add a new element to the set.
1236    #[allow(clippy::inline_always)]
1237    #[inline(always)]
1238    pub fn add(&mut self, v: T) {
1239        self.sorted = false;
1240        self.data.push(Partial(v));
1241    }
1242
1243    /// Return the number of data points.
1244    #[inline]
1245    #[must_use]
1246    pub const fn len(&self) -> usize {
1247        self.data.len()
1248    }
1249
1250    #[inline]
1251    #[must_use]
1252    pub const fn is_empty(&self) -> bool {
1253        self.data.is_empty()
1254    }
1255
1256    #[inline]
1257    fn sort(&mut self) {
1258        if !self.sorted {
1259            // Use sequential sort for small datasets (< 10k elements) to avoid parallel overhead
1260            if self.data.len() < PARALLEL_THRESHOLD {
1261                self.data.sort_unstable();
1262            } else {
1263                self.data.par_sort_unstable();
1264            }
1265            self.sorted = true;
1266        }
1267    }
1268
1269    #[inline]
1270    const fn already_sorted(&mut self) {
1271        self.sorted = true;
1272    }
1273
1274    /// Add multiple elements efficiently
1275    #[inline]
1276    pub fn add_bulk(&mut self, values: Vec<T>) {
1277        self.sorted = false;
1278        self.data.reserve(values.len());
1279        self.data.extend(values.into_iter().map(Partial));
1280    }
1281
1282    /// Shrink capacity to fit current data
1283    #[inline]
1284    pub fn shrink_to_fit(&mut self) {
1285        self.data.shrink_to_fit();
1286    }
1287
1288    /// Create with specific capacity
1289    #[inline]
1290    #[must_use]
1291    pub fn with_capacity(capacity: usize) -> Self {
1292        Unsorted {
1293            sorted: true,
1294            data: Vec::with_capacity(capacity),
1295        }
1296    }
1297
1298    /// Add a value assuming it's greater than all existing values
1299    #[inline]
1300    pub fn push_ascending(&mut self, value: T) {
1301        if let Some(last) = self.data.last() {
1302            debug_assert!(last.0 <= value, "Value must be >= than last element");
1303        }
1304        self.data.push(Partial(value));
1305        // Data remains sorted
1306    }
1307}
1308
1309impl<T: PartialOrd + PartialEq + Clone> Unsorted<T> {
1310    #[inline]
1311    /// Returns the cardinality of the data.
1312    /// Set `sorted` to `true` if the data is already sorted.
1313    /// Set `parallel_threshold` to `0` to force sequential processing.
1314    /// Set `parallel_threshold` to `1` to use the default parallel threshold (`10_000`).
1315    /// Set `parallel_threshold` to `2` to force parallel processing.
1316    /// Set `parallel_threshold` to any other value to use a custom parallel threshold
1317    /// greater than the default threshold of `10_000`.
1318    pub fn cardinality(&mut self, sorted: bool, parallel_threshold: usize) -> u64 {
1319        const CHUNK_SIZE: usize = 2048; // Process data in chunks of 2048 elements
1320        const DEFAULT_PARALLEL_THRESHOLD: usize = 10_240; // multiple of 2048
1321
1322        let len = self.data.len();
1323        match len {
1324            0 => return 0,
1325            1 => return 1,
1326            _ => {}
1327        }
1328
1329        if sorted {
1330            self.already_sorted();
1331        } else {
1332            self.sort();
1333        }
1334
1335        let use_parallel = parallel_threshold != 0
1336            && (parallel_threshold == 1
1337                || len > parallel_threshold.max(DEFAULT_PARALLEL_THRESHOLD));
1338
1339        if use_parallel {
1340            // Parallel processing using chunks
1341            // Process chunks in parallel, returning (count, first_elem, last_elem) for each
1342            type ChunkInfo<'a, T> = Vec<(u64, Option<&'a Partial<T>>, Option<&'a Partial<T>>)>;
1343            let chunk_info: ChunkInfo<'_, T> = self
1344                .data
1345                .par_chunks(CHUNK_SIZE)
1346                .map(|chunk| {
1347                    // Count unique elements within this chunk
1348                    let mut count = u64::from(!chunk.is_empty());
1349                    for window in chunk.windows(2) {
1350                        // safety: windows(2) guarantees window has length 2
1351                        if unsafe { window.get_unchecked(0) != window.get_unchecked(1) } {
1352                            count += 1;
1353                        }
1354                    }
1355                    (count, chunk.first(), chunk.last())
1356                })
1357                .collect();
1358
1359            // Combine results, checking boundaries between chunks
1360            let mut total = 0;
1361            for (i, &(count, first_opt, _last_opt)) in chunk_info.iter().enumerate() {
1362                total += count;
1363
1364                // Check boundary with previous chunk
1365                if i > 0
1366                    && let (Some(prev_last), Some(curr_first)) = (chunk_info[i - 1].2, first_opt)
1367                    && prev_last == curr_first
1368                {
1369                    total -= 1; // Deduct 1 if boundary values are equal
1370                }
1371            }
1372
1373            total
1374        } else {
1375            // Sequential processing
1376
1377            // the statement below is equivalent to:
1378            // let mut count = if self.data.is_empty() { 0 } else { 1 };
1379            let mut count = u64::from(!self.data.is_empty());
1380
1381            for window in self.data.windows(2) {
1382                // safety: windows(2) guarantees window has length 2
1383                if unsafe { window.get_unchecked(0) != window.get_unchecked(1) } {
1384                    count += 1;
1385                }
1386            }
1387            count
1388        }
1389    }
1390}
1391
1392impl<T: PartialOrd + Clone> Unsorted<T> {
1393    /// Returns the mode of the data.
1394    #[inline]
1395    pub fn mode(&mut self) -> Option<T> {
1396        if self.data.is_empty() {
1397            return None;
1398        }
1399        self.sort();
1400        mode_on_sorted(self.data.iter().map(|p| &p.0)).cloned()
1401    }
1402
1403    /// Returns the modes of the data.
1404    /// Note that there is also a `frequency::mode()` function that return one mode
1405    /// with the highest frequency. If there is a tie, it returns None.
1406    #[inline]
1407    fn modes(&mut self) -> (Vec<T>, usize, u32) {
1408        if self.data.is_empty() {
1409            return (Vec::new(), 0, 0);
1410        }
1411        self.sort();
1412        modes_and_antimodes_on_sorted_slice(&self.data).0
1413    }
1414
1415    /// Returns the antimodes of the data.
1416    /// `antimodes_result` only returns the first 10 antimodes
1417    #[inline]
1418    fn antimodes(&mut self) -> (Vec<T>, usize, u32) {
1419        if self.data.is_empty() {
1420            return (Vec::new(), 0, 0);
1421        }
1422        self.sort();
1423        modes_and_antimodes_on_sorted_slice(&self.data).1
1424    }
1425
1426    /// Returns the modes and antimodes of the data.
1427    /// `antimodes_result` only returns the first 10 antimodes
1428    #[allow(clippy::type_complexity)]
1429    #[inline]
1430    pub fn modes_antimodes(&mut self) -> ((Vec<T>, usize, u32), (Vec<T>, usize, u32)) {
1431        if self.data.is_empty() {
1432            return ((Vec::new(), 0, 0), (Vec::new(), 0, 0));
1433        }
1434        self.sort();
1435        modes_and_antimodes_on_sorted_slice(&self.data)
1436    }
1437}
1438
1439impl<T: PartialOrd + ToPrimitive> Unsorted<T> {
1440    /// Returns the median of the data.
1441    #[inline]
1442    pub fn median(&mut self) -> Option<f64> {
1443        if self.data.is_empty() {
1444            return None;
1445        }
1446        self.sort();
1447        median_on_sorted(&self.data)
1448    }
1449}
1450
1451impl<T: PartialOrd + ToPrimitive> Unsorted<T> {
1452    /// Returns the Median Absolute Deviation (MAD) of the data.
1453    #[inline]
1454    pub fn mad(&mut self, existing_median: Option<f64>) -> Option<f64> {
1455        if self.data.is_empty() {
1456            return None;
1457        }
1458        if existing_median.is_none() {
1459            self.sort();
1460        }
1461        mad_on_sorted(&self.data, existing_median)
1462    }
1463}
1464
1465impl<T: PartialOrd + ToPrimitive> Unsorted<T> {
1466    /// Returns the quartiles of the data using the traditional sorting approach.
1467    ///
1468    /// This method sorts the data first and then computes quartiles.
1469    /// Time complexity: O(n log n)
1470    #[inline]
1471    pub fn quartiles(&mut self) -> Option<(f64, f64, f64)> {
1472        if self.data.is_empty() {
1473            return None;
1474        }
1475        self.sort();
1476        quartiles_on_sorted(&self.data)
1477    }
1478}
1479
1480impl<T: PartialOrd + ToPrimitive + Sync> Unsorted<T> {
1481    /// Returns the Gini Coefficient of the data.
1482    ///
1483    /// The Gini Coefficient measures inequality in a distribution, ranging from 0 (perfect equality)
1484    /// to 1 (perfect inequality). This method sorts the data first and then computes the Gini coefficient.
1485    /// Time complexity: O(n log n)
1486    #[inline]
1487    pub fn gini(&mut self) -> Option<f64> {
1488        if self.data.is_empty() {
1489            return None;
1490        }
1491        self.sort();
1492        gini_on_sorted(&self.data)
1493    }
1494
1495    /// Returns the kurtosis (excess kurtosis) of the data.
1496    ///
1497    /// Kurtosis measures the "tailedness" of a distribution. Excess kurtosis is kurtosis - 3,
1498    /// where 0 indicates a normal distribution, positive values indicate heavy tails, and
1499    /// negative values indicate light tails. This method sorts the data first and then computes kurtosis.
1500    /// Time complexity: O(n log n)
1501    #[inline]
1502    pub fn kurtosis(&mut self) -> Option<f64> {
1503        if self.data.is_empty() {
1504            return None;
1505        }
1506        self.sort();
1507        kurtosis_on_sorted(&self.data)
1508    }
1509
1510    /// Returns the percentile rank of a value in the data.
1511    ///
1512    /// Returns the percentile rank (0-100) of the given value. If the value is less than all
1513    /// values, returns 0.0. If greater than all, returns 100.0.
1514    /// This method sorts the data first and then computes the percentile rank.
1515    /// Time complexity: O(n log n)
1516    #[inline]
1517    pub fn percentile_rank<V>(&mut self, value: V) -> Option<f64>
1518    where
1519        V: PartialOrd + ToPrimitive,
1520    {
1521        if self.data.is_empty() {
1522            return None;
1523        }
1524        self.sort();
1525        percentile_rank_on_sorted(&self.data, &value)
1526    }
1527
1528    /// Returns the Atkinson Index of the data.
1529    ///
1530    /// The Atkinson Index measures inequality with an inequality aversion parameter ε.
1531    /// It ranges from 0 (perfect equality) to 1 (perfect inequality).
1532    /// Higher ε values give more weight to inequality at the lower end of the distribution.
1533    /// This method sorts the data first and then computes the Atkinson index.
1534    /// Time complexity: O(n log n)
1535    ///
1536    /// # Arguments
1537    /// * `epsilon` - Inequality aversion parameter (must be >= 0). Common values:
1538    ///   - 0.0: No inequality aversion (returns 0)
1539    ///   - 0.5: Moderate aversion
1540    ///   - 1.0: Uses geometric mean (special case)
1541    ///   - 2.0: High aversion
1542    #[inline]
1543    pub fn atkinson(&mut self, epsilon: f64) -> Option<f64> {
1544        if self.data.is_empty() {
1545            return None;
1546        }
1547        self.sort();
1548        atkinson_on_sorted(&self.data, epsilon)
1549    }
1550}
1551
1552impl<T: PartialOrd + ToPrimitive + Clone> Unsorted<T> {
1553    /// Returns the quartiles of the data using selection algorithm.
1554    ///
1555    /// This implementation uses a selection algorithm (quickselect) to find quartiles
1556    /// in O(n) average time complexity instead of O(n log n) sorting.
1557    /// Requires T to implement Clone to create a working copy of the data.
1558    ///
1559    /// **Performance Note**: While theoretically O(n) vs O(n log n), this implementation
1560    /// is often slower than the sorting-based approach for small to medium datasets due to:
1561    /// - Need to find multiple order statistics (3 separate quickselect calls)
1562    /// - Overhead of copying data to avoid mutation
1563    /// - Rayon's highly optimized parallel sorting
1564    #[inline]
1565    pub fn quartiles_with_selection(&mut self) -> Option<(f64, f64, f64)> {
1566        if self.data.is_empty() {
1567            return None;
1568        }
1569        // If data is already sorted, use zero-copy approach to avoid cloning
1570        if self.sorted {
1571            return quartiles_with_zero_copy_selection(&self.data);
1572        }
1573        // Create a copy using collect to avoid mutating the original for selection algorithm
1574        let mut data_copy: Vec<Partial<T>> =
1575            self.data.iter().map(|x| Partial(x.0.clone())).collect();
1576        quartiles_with_selection(&mut data_copy)
1577    }
1578}
1579
1580impl<T: PartialOrd + ToPrimitive> Unsorted<T> {
1581    /// Returns the quartiles using zero-copy selection algorithm.
1582    ///
1583    /// This implementation avoids copying data by working with indices instead,
1584    /// providing better performance than the clone-based selection approach.
1585    /// The algorithm is O(n) average time and only allocates a vector of indices (usize).
1586    #[inline]
1587    #[must_use]
1588    pub fn quartiles_zero_copy(&self) -> Option<(f64, f64, f64)> {
1589        if self.data.is_empty() {
1590            return None;
1591        }
1592        quartiles_with_zero_copy_selection(&self.data)
1593    }
1594}
1595
1596impl<T: PartialOrd> Commute for Unsorted<T> {
1597    #[inline]
1598    fn merge(&mut self, mut v: Unsorted<T>) {
1599        if v.is_empty() {
1600            return;
1601        }
1602
1603        self.sorted = false;
1604        // we use std::mem::take to avoid unnecessary allocations
1605        self.data.extend(std::mem::take(&mut v.data));
1606    }
1607}
1608
1609impl<T: PartialOrd> Default for Unsorted<T> {
1610    #[inline]
1611    fn default() -> Unsorted<T> {
1612        Unsorted {
1613            data: Vec::with_capacity(10_000),
1614            sorted: true, // empty is sorted
1615        }
1616    }
1617}
1618
1619impl<T: PartialOrd> FromIterator<T> for Unsorted<T> {
1620    #[inline]
1621    fn from_iter<I: IntoIterator<Item = T>>(it: I) -> Unsorted<T> {
1622        let mut v = Unsorted::new();
1623        v.extend(it);
1624        v
1625    }
1626}
1627
1628impl<T: PartialOrd> Extend<T> for Unsorted<T> {
1629    #[inline]
1630    fn extend<I: IntoIterator<Item = T>>(&mut self, it: I) {
1631        self.sorted = false;
1632        self.data.extend(it.into_iter().map(Partial));
1633    }
1634}
1635
1636fn custom_percentiles_on_sorted<T>(data: &[Partial<T>], percentiles: &[u8]) -> Option<Vec<T>>
1637where
1638    T: PartialOrd + Clone,
1639{
1640    let len = data.len();
1641
1642    // Early return for empty array or invalid percentiles
1643    if len == 0 || percentiles.iter().any(|&p| p > 100) {
1644        return None;
1645    }
1646
1647    // Optimize: Check if percentiles are already sorted and unique
1648    let unique_percentiles: Vec<u8> = if percentiles.len() <= 1 {
1649        // Single or empty percentile - no need to sort/dedup
1650        percentiles.to_vec()
1651    } else {
1652        // Check if already sorted and unique (common case)
1653        let is_sorted_unique = percentiles.windows(2).all(|w| w[0] < w[1]);
1654
1655        if is_sorted_unique {
1656            // Already sorted and unique, use directly without cloning
1657            percentiles.to_vec()
1658        } else {
1659            // Need to sort and dedup - use HashSet for efficient deduplication
1660            use std::collections::HashSet;
1661            let mut seen = HashSet::with_capacity(percentiles.len().min(100));
1662            let mut sorted_unique = Vec::with_capacity(percentiles.len());
1663            for &p in percentiles {
1664                if seen.insert(p) {
1665                    sorted_unique.push(p);
1666                }
1667            }
1668            sorted_unique.sort_unstable();
1669            sorted_unique
1670        }
1671    };
1672
1673    let mut results = Vec::with_capacity(unique_percentiles.len());
1674
1675    // SAFETY: All index calculations below are guaranteed to be in bounds
1676    // because we've verified len > 0 and the rank calculation ensures
1677    // the index is within bounds
1678    unsafe {
1679        for &p in &unique_percentiles {
1680            // Calculate the ordinal rank using nearest-rank method
1681            // see https://en.wikipedia.org/wiki/Percentile#The_nearest-rank_method
1682            // n = ⌈(P/100) × N⌉
1683            #[allow(clippy::cast_sign_loss)]
1684            let rank = ((f64::from(p) / 100.0) * len as f64).ceil() as usize;
1685
1686            // Convert to 0-based index
1687            let idx = rank.saturating_sub(1);
1688
1689            // Get the value at that rank and extract the inner value
1690            results.push(data.get_unchecked(idx).0.clone());
1691        }
1692    }
1693
1694    Some(results)
1695}
1696
1697impl<T: PartialOrd + Clone> Unsorted<T> {
1698    /// Returns the requested percentiles of the data.
1699    ///
1700    /// Uses the nearest-rank method to compute percentiles.
1701    /// Each returned value is an actual value from the dataset.
1702    ///
1703    /// # Arguments
1704    /// * `percentiles` - A slice of u8 values representing percentiles to compute (0-100)
1705    ///
1706    /// # Returns
1707    /// * `None` if the data is empty or if any percentile is > 100
1708    /// * `Some(Vec<T>)` containing percentile values in the same order as requested
1709    ///
1710    /// # Example
1711    /// ```
1712    /// use stats::Unsorted;
1713    /// let mut data = Unsorted::new();
1714    /// data.extend(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
1715    /// let percentiles = vec![25, 50, 75];
1716    /// let results = data.custom_percentiles(&percentiles).unwrap();
1717    /// assert_eq!(results, vec![3, 5, 8]);
1718    /// ```
1719    #[inline]
1720    pub fn custom_percentiles(&mut self, percentiles: &[u8]) -> Option<Vec<T>> {
1721        if self.data.is_empty() {
1722            return None;
1723        }
1724        self.sort();
1725        custom_percentiles_on_sorted(&self.data, percentiles)
1726    }
1727}
1728
1729#[cfg(test)]
1730mod test {
1731    use super::*;
1732
1733    #[test]
1734    fn test_cardinality_empty() {
1735        let mut unsorted: Unsorted<i32> = Unsorted::new();
1736        assert_eq!(unsorted.cardinality(false, 1), 0);
1737    }
1738
1739    #[test]
1740    fn test_cardinality_single_element() {
1741        let mut unsorted = Unsorted::new();
1742        unsorted.add(5);
1743        assert_eq!(unsorted.cardinality(false, 1), 1);
1744    }
1745
1746    #[test]
1747    fn test_cardinality_unique_elements() {
1748        let mut unsorted = Unsorted::new();
1749        unsorted.extend(vec![1, 2, 3, 4, 5]);
1750        assert_eq!(unsorted.cardinality(false, 1), 5);
1751    }
1752
1753    #[test]
1754    fn test_cardinality_duplicate_elements() {
1755        let mut unsorted = Unsorted::new();
1756        unsorted.extend(vec![1, 2, 2, 3, 3, 3, 4, 4, 4, 4]);
1757        assert_eq!(unsorted.cardinality(false, 1), 4);
1758    }
1759
1760    #[test]
1761    fn test_cardinality_all_same() {
1762        let mut unsorted = Unsorted::new();
1763        unsorted.extend(vec![1; 100]);
1764        assert_eq!(unsorted.cardinality(false, 1), 1);
1765    }
1766
1767    #[test]
1768    fn test_cardinality_large_range() {
1769        let mut unsorted = Unsorted::new();
1770        unsorted.extend(0..1_000_000);
1771        assert_eq!(unsorted.cardinality(false, 1), 1_000_000);
1772    }
1773
1774    #[test]
1775    fn test_cardinality_large_range_sequential() {
1776        let mut unsorted = Unsorted::new();
1777        unsorted.extend(0..1_000_000);
1778        assert_eq!(unsorted.cardinality(false, 2_000_000), 1_000_000);
1779    }
1780
1781    #[test]
1782    fn test_cardinality_presorted() {
1783        let mut unsorted = Unsorted::new();
1784        unsorted.extend(vec![1, 2, 3, 4, 5]);
1785        unsorted.sort();
1786        assert_eq!(unsorted.cardinality(true, 1), 5);
1787    }
1788
1789    #[test]
1790    fn test_cardinality_float() {
1791        let mut unsorted = Unsorted::new();
1792        unsorted.extend(vec![1.0, 1.0, 2.0, 3.0, 3.0, 4.0]);
1793        assert_eq!(unsorted.cardinality(false, 1), 4);
1794    }
1795
1796    #[test]
1797    fn test_cardinality_string() {
1798        let mut unsorted = Unsorted::new();
1799        unsorted.extend(vec!["a", "b", "b", "c", "c", "c"]);
1800        assert_eq!(unsorted.cardinality(false, 1), 3);
1801    }
1802
1803    #[test]
1804    fn test_quartiles_selection_vs_sorted() {
1805        // Test that selection-based quartiles gives same results as sorting-based
1806        let test_cases = vec![
1807            vec![3, 5, 7, 9],
1808            vec![3, 5, 7],
1809            vec![1, 2, 7, 11],
1810            vec![3, 5, 7, 9, 12],
1811            vec![2, 2, 3, 8, 10],
1812            vec![3, 5, 7, 9, 12, 20],
1813            vec![0, 2, 4, 8, 10, 11],
1814            vec![3, 5, 7, 9, 12, 20, 21],
1815            vec![1, 5, 6, 6, 7, 10, 19],
1816        ];
1817
1818        for test_case in test_cases {
1819            let mut unsorted1 = Unsorted::new();
1820            let mut unsorted2 = Unsorted::new();
1821            let mut unsorted3 = Unsorted::new();
1822            unsorted1.extend(test_case.clone());
1823            unsorted2.extend(test_case.clone());
1824            unsorted3.extend(test_case.clone());
1825
1826            let result_sorted = unsorted1.quartiles();
1827            let result_selection = unsorted2.quartiles_with_selection();
1828            let result_zero_copy = unsorted3.quartiles_zero_copy();
1829
1830            assert_eq!(
1831                result_sorted, result_selection,
1832                "Selection mismatch for test case: {:?}",
1833                test_case
1834            );
1835            assert_eq!(
1836                result_sorted, result_zero_copy,
1837                "Zero-copy mismatch for test case: {:?}",
1838                test_case
1839            );
1840        }
1841    }
1842
1843    #[test]
1844    fn test_quartiles_with_selection_small() {
1845        // Test edge cases for selection-based quartiles
1846        let mut unsorted: Unsorted<i32> = Unsorted::new();
1847        assert_eq!(unsorted.quartiles_with_selection(), None);
1848
1849        let mut unsorted = Unsorted::new();
1850        unsorted.extend(vec![1, 2]);
1851        assert_eq!(unsorted.quartiles_with_selection(), None);
1852
1853        let mut unsorted = Unsorted::new();
1854        unsorted.extend(vec![1, 2, 3]);
1855        assert_eq!(unsorted.quartiles_with_selection(), Some((1.0, 2.0, 3.0)));
1856    }
1857
1858    #[test]
1859    fn test_quickselect() {
1860        let data = vec![
1861            Partial(3),
1862            Partial(1),
1863            Partial(4),
1864            Partial(1),
1865            Partial(5),
1866            Partial(9),
1867            Partial(2),
1868            Partial(6),
1869        ];
1870
1871        // Test finding different positions
1872        assert_eq!(quickselect(&mut data.clone(), 0), Some(&1));
1873        assert_eq!(quickselect(&mut data.clone(), 3), Some(&3));
1874        assert_eq!(quickselect(&mut data.clone(), 7), Some(&9));
1875
1876        // Test edge cases
1877        let mut empty: Vec<Partial<i32>> = vec![];
1878        assert_eq!(quickselect(&mut empty, 0), None);
1879
1880        let mut data = vec![Partial(3), Partial(1), Partial(4), Partial(1), Partial(5)];
1881        assert_eq!(quickselect(&mut data, 10), None); // k >= len
1882    }
1883
1884    #[test]
1885    fn median_stream() {
1886        assert_eq!(median(vec![3usize, 5, 7, 9].into_iter()), Some(6.0));
1887        assert_eq!(median(vec![3usize, 5, 7].into_iter()), Some(5.0));
1888    }
1889
1890    #[test]
1891    fn mad_stream() {
1892        assert_eq!(mad(vec![3usize, 5, 7, 9].into_iter(), None), Some(2.0));
1893        assert_eq!(
1894            mad(
1895                vec![
1896                    86usize, 60, 95, 39, 49, 12, 56, 82, 92, 24, 33, 28, 46, 34, 100, 39, 100, 38,
1897                    50, 61, 39, 88, 5, 13, 64
1898                ]
1899                .into_iter(),
1900                None
1901            ),
1902            Some(16.0)
1903        );
1904    }
1905
1906    #[test]
1907    fn mad_stream_precalc_median() {
1908        let data = vec![3usize, 5, 7, 9].into_iter();
1909        let median1 = median(data.clone());
1910        assert_eq!(mad(data, median1), Some(2.0));
1911
1912        let data2 = vec![
1913            86usize, 60, 95, 39, 49, 12, 56, 82, 92, 24, 33, 28, 46, 34, 100, 39, 100, 38, 50, 61,
1914            39, 88, 5, 13, 64,
1915        ]
1916        .into_iter();
1917        let median2 = median(data2.clone());
1918        assert_eq!(mad(data2, median2), Some(16.0));
1919    }
1920
1921    #[test]
1922    fn mode_stream() {
1923        assert_eq!(mode(vec![3usize, 5, 7, 9].into_iter()), None);
1924        assert_eq!(mode(vec![3usize, 3, 3, 3].into_iter()), Some(3));
1925        assert_eq!(mode(vec![3usize, 3, 3, 4].into_iter()), Some(3));
1926        assert_eq!(mode(vec![4usize, 3, 3, 3].into_iter()), Some(3));
1927        assert_eq!(mode(vec![1usize, 1, 2, 3, 3].into_iter()), None);
1928    }
1929
1930    #[test]
1931    fn median_floats() {
1932        assert_eq!(median(vec![3.0f64, 5.0, 7.0, 9.0].into_iter()), Some(6.0));
1933        assert_eq!(median(vec![3.0f64, 5.0, 7.0].into_iter()), Some(5.0));
1934    }
1935
1936    #[test]
1937    fn mode_floats() {
1938        assert_eq!(mode(vec![3.0f64, 5.0, 7.0, 9.0].into_iter()), None);
1939        assert_eq!(mode(vec![3.0f64, 3.0, 3.0, 3.0].into_iter()), Some(3.0));
1940        assert_eq!(mode(vec![3.0f64, 3.0, 3.0, 4.0].into_iter()), Some(3.0));
1941        assert_eq!(mode(vec![4.0f64, 3.0, 3.0, 3.0].into_iter()), Some(3.0));
1942        assert_eq!(mode(vec![1.0f64, 1.0, 2.0, 3.0, 3.0].into_iter()), None);
1943    }
1944
1945    #[test]
1946    fn modes_stream() {
1947        assert_eq!(modes(vec![3usize, 5, 7, 9].into_iter()), (vec![], 0, 0));
1948        assert_eq!(modes(vec![3usize, 3, 3, 3].into_iter()), (vec![3], 1, 4));
1949        assert_eq!(modes(vec![3usize, 3, 4, 4].into_iter()), (vec![3, 4], 2, 2));
1950        assert_eq!(modes(vec![4usize, 3, 3, 3].into_iter()), (vec![3], 1, 3));
1951        assert_eq!(modes(vec![1usize, 1, 2, 2].into_iter()), (vec![1, 2], 2, 2));
1952        let vec: Vec<u32> = vec![];
1953        assert_eq!(modes(vec.into_iter()), (vec![], 0, 0));
1954    }
1955
1956    #[test]
1957    fn modes_floats() {
1958        assert_eq!(
1959            modes(vec![3_f64, 5.0, 7.0, 9.0].into_iter()),
1960            (vec![], 0, 0)
1961        );
1962        assert_eq!(
1963            modes(vec![3_f64, 3.0, 3.0, 3.0].into_iter()),
1964            (vec![3.0], 1, 4)
1965        );
1966        assert_eq!(
1967            modes(vec![3_f64, 3.0, 4.0, 4.0].into_iter()),
1968            (vec![3.0, 4.0], 2, 2)
1969        );
1970        assert_eq!(
1971            modes(vec![1_f64, 1.0, 2.0, 3.0, 3.0].into_iter()),
1972            (vec![1.0, 3.0], 2, 2)
1973        );
1974    }
1975
1976    #[test]
1977    fn antimodes_stream() {
1978        assert_eq!(
1979            antimodes(vec![3usize, 5, 7, 9].into_iter()),
1980            (vec![3, 5, 7, 9], 4, 1)
1981        );
1982        assert_eq!(
1983            antimodes(vec![1usize, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13].into_iter()),
1984            (vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 13, 1)
1985        );
1986        assert_eq!(
1987            antimodes(vec![1usize, 3, 3, 3].into_iter()),
1988            (vec![1], 1, 1)
1989        );
1990        assert_eq!(
1991            antimodes(vec![3usize, 3, 4, 4].into_iter()),
1992            (vec![3, 4], 2, 2)
1993        );
1994        assert_eq!(
1995            antimodes(
1996                vec![
1997                    3usize, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
1998                    14, 14, 15, 15
1999                ]
2000                .into_iter()
2001            ),
2002            // we only show the first 10 of the 13 antimodes
2003            (vec![3, 4, 5, 6, 7, 8, 9, 10, 11, 12], 13, 2)
2004        );
2005        assert_eq!(
2006            antimodes(
2007                vec![
2008                    3usize, 3, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 4, 4, 5, 5, 6, 6, 7, 7, 13, 13,
2009                    14, 14, 15, 15
2010                ]
2011                .into_iter()
2012            ),
2013            (vec![3, 4, 5, 6, 7, 8, 9, 10, 11, 12], 13, 2)
2014        );
2015        assert_eq!(
2016            antimodes(vec![3usize, 3, 3, 4].into_iter()),
2017            (vec![4], 1, 1)
2018        );
2019        assert_eq!(
2020            antimodes(vec![4usize, 3, 3, 3].into_iter()),
2021            (vec![4], 1, 1)
2022        );
2023        assert_eq!(
2024            antimodes(vec![1usize, 1, 2, 2].into_iter()),
2025            (vec![1, 2], 2, 2)
2026        );
2027        let vec: Vec<u32> = vec![];
2028        assert_eq!(antimodes(vec.into_iter()), (vec![], 0, 0));
2029    }
2030
2031    #[test]
2032    fn antimodes_floats() {
2033        assert_eq!(
2034            antimodes(vec![3_f64, 5.0, 7.0, 9.0].into_iter()),
2035            (vec![3.0, 5.0, 7.0, 9.0], 4, 1)
2036        );
2037        assert_eq!(
2038            antimodes(vec![3_f64, 3.0, 3.0, 3.0].into_iter()),
2039            (vec![], 0, 0)
2040        );
2041        assert_eq!(
2042            antimodes(vec![3_f64, 3.0, 4.0, 4.0].into_iter()),
2043            (vec![3.0, 4.0], 2, 2)
2044        );
2045        assert_eq!(
2046            antimodes(vec![1_f64, 1.0, 2.0, 3.0, 3.0].into_iter()),
2047            (vec![2.0], 1, 1)
2048        );
2049    }
2050
2051    #[test]
2052    fn test_custom_percentiles() {
2053        // Test with integers
2054        let mut unsorted: Unsorted<i32> = Unsorted::new();
2055        unsorted.extend(1..=11); // [1,2,3,4,5,6,7,8,9,10,11]
2056
2057        let result = unsorted.custom_percentiles(&[25, 50, 75]).unwrap();
2058        assert_eq!(result, vec![3, 6, 9]);
2059
2060        // Test with strings
2061        let mut str_data = Unsorted::new();
2062        str_data.extend(vec!["a", "b", "c", "d", "e"]);
2063        let result = str_data.custom_percentiles(&[20, 40, 60, 80]).unwrap();
2064        assert_eq!(result, vec!["a", "b", "c", "d"]);
2065
2066        // Test with chars
2067        let mut char_data = Unsorted::new();
2068        char_data.extend('a'..='e');
2069        let result = char_data.custom_percentiles(&[25, 50, 75]).unwrap();
2070        assert_eq!(result, vec!['b', 'c', 'd']);
2071
2072        // Test with floats
2073        let mut float_data = Unsorted::new();
2074        float_data.extend(vec![1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9]);
2075        let result = float_data
2076            .custom_percentiles(&[10, 30, 50, 70, 90])
2077            .unwrap();
2078        assert_eq!(result, vec![1.1, 3.3, 5.5, 7.7, 9.9]);
2079
2080        // Test with empty percentiles array
2081        let result = float_data.custom_percentiles(&[]).unwrap();
2082        assert_eq!(result, Vec::<f64>::new());
2083
2084        // Test with duplicate percentiles
2085        let result = float_data.custom_percentiles(&[50, 50, 50]).unwrap();
2086        assert_eq!(result, vec![5.5]);
2087
2088        // Test with extreme percentiles
2089        let result = float_data.custom_percentiles(&[0, 100]).unwrap();
2090        assert_eq!(result, vec![1.1, 9.9]);
2091
2092        // Test with unsorted percentiles
2093        let result = float_data.custom_percentiles(&[75, 25, 50]).unwrap();
2094        assert_eq!(result, vec![3.3, 5.5, 7.7]); // results always sorted
2095
2096        // Test with single element
2097        let mut single = Unsorted::new();
2098        single.add(42);
2099        let result = single.custom_percentiles(&[0, 50, 100]).unwrap();
2100        assert_eq!(result, vec![42, 42, 42]);
2101    }
2102
2103    #[test]
2104    fn quartiles_stream() {
2105        assert_eq!(
2106            quartiles(vec![3usize, 5, 7].into_iter()),
2107            Some((3., 5., 7.))
2108        );
2109        assert_eq!(
2110            quartiles(vec![3usize, 5, 7, 9].into_iter()),
2111            Some((4., 6., 8.))
2112        );
2113        assert_eq!(
2114            quartiles(vec![1usize, 2, 7, 11].into_iter()),
2115            Some((1.5, 4.5, 9.))
2116        );
2117        assert_eq!(
2118            quartiles(vec![3usize, 5, 7, 9, 12].into_iter()),
2119            Some((4., 7., 10.5))
2120        );
2121        assert_eq!(
2122            quartiles(vec![2usize, 2, 3, 8, 10].into_iter()),
2123            Some((2., 3., 9.))
2124        );
2125        assert_eq!(
2126            quartiles(vec![3usize, 5, 7, 9, 12, 20].into_iter()),
2127            Some((5., 8., 12.))
2128        );
2129        assert_eq!(
2130            quartiles(vec![0usize, 2, 4, 8, 10, 11].into_iter()),
2131            Some((2., 6., 10.))
2132        );
2133        assert_eq!(
2134            quartiles(vec![3usize, 5, 7, 9, 12, 20, 21].into_iter()),
2135            Some((5., 9., 20.))
2136        );
2137        assert_eq!(
2138            quartiles(vec![1usize, 5, 6, 6, 7, 10, 19].into_iter()),
2139            Some((5., 6., 10.))
2140        );
2141    }
2142
2143    #[test]
2144    fn quartiles_floats() {
2145        assert_eq!(
2146            quartiles(vec![3_f64, 5., 7.].into_iter()),
2147            Some((3., 5., 7.))
2148        );
2149        assert_eq!(
2150            quartiles(vec![3_f64, 5., 7., 9.].into_iter()),
2151            Some((4., 6., 8.))
2152        );
2153        assert_eq!(
2154            quartiles(vec![3_f64, 5., 7., 9., 12.].into_iter()),
2155            Some((4., 7., 10.5))
2156        );
2157        assert_eq!(
2158            quartiles(vec![3_f64, 5., 7., 9., 12., 20.].into_iter()),
2159            Some((5., 8., 12.))
2160        );
2161        assert_eq!(
2162            quartiles(vec![3_f64, 5., 7., 9., 12., 20., 21.].into_iter()),
2163            Some((5., 9., 20.))
2164        );
2165    }
2166
2167    #[test]
2168    fn test_quartiles_zero_copy_small() {
2169        // Test edge cases for zero-copy quartiles
2170        let unsorted: Unsorted<i32> = Unsorted::new();
2171        assert_eq!(unsorted.quartiles_zero_copy(), None);
2172
2173        let mut unsorted = Unsorted::new();
2174        unsorted.extend(vec![1, 2]);
2175        assert_eq!(unsorted.quartiles_zero_copy(), None);
2176
2177        let mut unsorted = Unsorted::new();
2178        unsorted.extend(vec![1, 2, 3]);
2179        assert_eq!(unsorted.quartiles_zero_copy(), Some((1.0, 2.0, 3.0)));
2180
2181        // Test larger case
2182        let mut unsorted = Unsorted::new();
2183        unsorted.extend(vec![3, 5, 7, 9]);
2184        assert_eq!(unsorted.quartiles_zero_copy(), Some((4.0, 6.0, 8.0)));
2185    }
2186
2187    #[test]
2188    fn gini_empty() {
2189        let mut unsorted: Unsorted<i32> = Unsorted::new();
2190        assert_eq!(unsorted.gini(), None);
2191        let empty_vec: Vec<i32> = vec![];
2192        assert_eq!(gini(empty_vec.into_iter()), None);
2193    }
2194
2195    #[test]
2196    fn gini_single_element() {
2197        let mut unsorted = Unsorted::new();
2198        unsorted.add(5);
2199        assert_eq!(unsorted.gini(), Some(0.0));
2200        assert_eq!(gini(vec![5].into_iter()), Some(0.0));
2201    }
2202
2203    #[test]
2204    fn gini_perfect_equality() {
2205        // All values are the same - perfect equality, Gini = 0
2206        let mut unsorted = Unsorted::new();
2207        unsorted.extend(vec![10, 10, 10, 10, 10]);
2208        let result = unsorted.gini().unwrap();
2209        assert!((result - 0.0).abs() < 1e-10, "Expected 0.0, got {}", result);
2210
2211        assert!((gini(vec![10, 10, 10, 10, 10].into_iter()).unwrap() - 0.0).abs() < 1e-10);
2212    }
2213
2214    #[test]
2215    fn gini_perfect_inequality() {
2216        // One value has everything, others have zero - perfect inequality
2217        // For [0, 0, 0, 0, 100], Gini should be close to 1
2218        let mut unsorted = Unsorted::new();
2219        unsorted.extend(vec![0, 0, 0, 0, 100]);
2220        let result = unsorted.gini().unwrap();
2221        // Perfect inequality should give Gini close to 1
2222        // For n=5, one value=100, others=0: G = (2*5*100)/(5*100) - 6/5 = 2 - 1.2 = 0.8
2223        assert!((result - 0.8).abs() < 1e-10, "Expected 0.8, got {}", result);
2224    }
2225
2226    #[test]
2227    fn gini_stream() {
2228        // Test with known values
2229        // For [1, 2, 3, 4, 5]:
2230        // sum = 15
2231        // weighted_sum = 1*1 + 2*2 + 3*3 + 4*4 + 5*5 = 1 + 4 + 9 + 16 + 25 = 55
2232        // n = 5
2233        // G = (2 * 55) / (5 * 15) - 6/5 = 110/75 - 1.2 = 1.4667 - 1.2 = 0.2667
2234        let result = gini(vec![1usize, 2, 3, 4, 5].into_iter()).unwrap();
2235        let expected = (2.0 * 55.0) / (5.0 * 15.0) - 6.0 / 5.0;
2236        assert!(
2237            (result - expected).abs() < 1e-10,
2238            "Expected {}, got {}",
2239            expected,
2240            result
2241        );
2242    }
2243
2244    #[test]
2245    fn gini_floats() {
2246        let mut unsorted = Unsorted::new();
2247        unsorted.extend(vec![1.0, 2.0, 3.0, 4.0, 5.0]);
2248        let result = unsorted.gini().unwrap();
2249        let expected = (2.0 * 55.0) / (5.0 * 15.0) - 6.0 / 5.0;
2250        assert!((result - expected).abs() < 1e-10);
2251
2252        assert!(
2253            (gini(vec![1.0f64, 2.0, 3.0, 4.0, 5.0].into_iter()).unwrap() - expected).abs() < 1e-10
2254        );
2255    }
2256
2257    #[test]
2258    fn gini_all_zeros() {
2259        // All zeros - sum is zero, Gini is undefined
2260        let mut unsorted = Unsorted::new();
2261        unsorted.extend(vec![0, 0, 0, 0]);
2262        assert_eq!(unsorted.gini(), None);
2263        assert_eq!(gini(vec![0, 0, 0, 0].into_iter()), None);
2264    }
2265
2266    #[test]
2267    fn gini_negative_values() {
2268        // Test with negative values (mathematically valid)
2269        let mut unsorted = Unsorted::new();
2270        unsorted.extend(vec![-5, -3, -1, 1, 3, 5]);
2271        let result = unsorted.gini();
2272        // Sum is 0, so Gini is undefined
2273        assert_eq!(result, None);
2274
2275        // Test with negative values that don't sum to zero
2276        let mut unsorted = Unsorted::new();
2277        unsorted.extend(vec![-2, -1, 0, 1, 2]);
2278        let result = unsorted.gini();
2279        // Sum is 0, so Gini is undefined
2280        assert_eq!(result, None);
2281
2282        // Test with values that sum to non-zero
2283        let mut unsorted = Unsorted::new();
2284        unsorted.extend(vec![-1, 0, 1, 2, 3]);
2285        let result = unsorted.gini();
2286        assert!(result.is_some());
2287    }
2288
2289    #[test]
2290    fn gini_known_cases() {
2291        // Test case: [1, 1, 1, 1, 1] - perfect equality
2292        let mut unsorted = Unsorted::new();
2293        unsorted.extend(vec![1, 1, 1, 1, 1]);
2294        let result = unsorted.gini().unwrap();
2295        assert!((result - 0.0).abs() < 1e-10);
2296
2297        // Test case: [0, 0, 0, 0, 1] - high inequality
2298        let mut unsorted = Unsorted::new();
2299        unsorted.extend(vec![0, 0, 0, 0, 1]);
2300        let result = unsorted.gini().unwrap();
2301        // G = (2 * 5 * 1) / (5 * 1) - 6/5 = 2 - 1.2 = 0.8
2302        assert!((result - 0.8).abs() < 1e-10);
2303
2304        // Test case: [1, 2, 3] - moderate inequality
2305        let mut unsorted = Unsorted::new();
2306        unsorted.extend(vec![1, 2, 3]);
2307        let result = unsorted.gini().unwrap();
2308        // sum = 6, weighted_sum = 1*1 + 2*2 + 3*3 = 1 + 4 + 9 = 14
2309        // G = (2 * 14) / (3 * 6) - 4/3 = 28/18 - 4/3 = 1.5556 - 1.3333 = 0.2222
2310        let expected = (2.0 * 14.0) / (3.0 * 6.0) - 4.0 / 3.0;
2311        assert!((result - expected).abs() < 1e-10);
2312    }
2313
2314    #[test]
2315    fn kurtosis_empty() {
2316        let mut unsorted: Unsorted<i32> = Unsorted::new();
2317        assert_eq!(unsorted.kurtosis(), None);
2318        let empty_vec: Vec<i32> = vec![];
2319        assert_eq!(kurtosis(empty_vec.into_iter()), None);
2320    }
2321
2322    #[test]
2323    fn kurtosis_small() {
2324        // Need at least 4 elements
2325        let mut unsorted = Unsorted::new();
2326        unsorted.extend(vec![1, 2]);
2327        assert_eq!(unsorted.kurtosis(), None);
2328
2329        let mut unsorted = Unsorted::new();
2330        unsorted.extend(vec![1, 2, 3]);
2331        assert_eq!(unsorted.kurtosis(), None);
2332    }
2333
2334    #[test]
2335    fn kurtosis_normal_distribution() {
2336        // Normal distribution should have kurtosis close to 0
2337        let mut unsorted = Unsorted::new();
2338        unsorted.extend(vec![1, 2, 3, 4, 5]);
2339        let result = unsorted.kurtosis();
2340        assert!(result.is_some());
2341        // For small samples, kurtosis can vary significantly
2342    }
2343
2344    #[test]
2345    fn kurtosis_all_same() {
2346        // All same values - variance is 0, kurtosis undefined
2347        let mut unsorted = Unsorted::new();
2348        unsorted.extend(vec![5, 5, 5, 5]);
2349        assert_eq!(unsorted.kurtosis(), None);
2350    }
2351
2352    #[test]
2353    fn kurtosis_stream() {
2354        let result = kurtosis(vec![1usize, 2, 3, 4, 5].into_iter());
2355        assert!(result.is_some());
2356    }
2357
2358    #[test]
2359    fn percentile_rank_empty() {
2360        let mut unsorted: Unsorted<i32> = Unsorted::new();
2361        assert_eq!(unsorted.percentile_rank(5), None);
2362        let empty_vec: Vec<i32> = vec![];
2363        assert_eq!(percentile_rank(empty_vec.into_iter(), 5), None);
2364    }
2365
2366    #[test]
2367    fn percentile_rank_basic() {
2368        let mut unsorted = Unsorted::new();
2369        unsorted.extend(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
2370
2371        // Value less than all
2372        assert_eq!(unsorted.percentile_rank(0), Some(0.0));
2373
2374        // Value greater than all
2375        assert_eq!(unsorted.percentile_rank(11), Some(100.0));
2376
2377        // Median (5) should be around 50th percentile
2378        let rank = unsorted.percentile_rank(5).unwrap();
2379        assert!((rank - 50.0).abs() < 1.0);
2380
2381        // First value should be at 10th percentile
2382        let rank = unsorted.percentile_rank(1).unwrap();
2383        assert!((rank - 10.0).abs() < 1.0);
2384    }
2385
2386    #[test]
2387    fn percentile_rank_duplicates() {
2388        let mut unsorted = Unsorted::new();
2389        unsorted.extend(vec![1, 1, 2, 2, 3, 3, 4, 4, 5, 5]);
2390
2391        // Value 2 appears twice, should be at 40th percentile (4 values <= 2)
2392        let rank = unsorted.percentile_rank(2).unwrap();
2393        assert!((rank - 40.0).abs() < 1.0);
2394    }
2395
2396    #[test]
2397    fn percentile_rank_stream() {
2398        let result = percentile_rank(vec![1usize, 2, 3, 4, 5].into_iter(), 3);
2399        assert_eq!(result, Some(60.0)); // 3 out of 5 values <= 3
2400    }
2401
2402    #[test]
2403    fn atkinson_empty() {
2404        let mut unsorted: Unsorted<i32> = Unsorted::new();
2405        assert_eq!(unsorted.atkinson(1.0), None);
2406        let empty_vec: Vec<i32> = vec![];
2407        assert_eq!(atkinson(empty_vec.into_iter(), 1.0), None);
2408    }
2409
2410    #[test]
2411    fn atkinson_single_element() {
2412        let mut unsorted = Unsorted::new();
2413        unsorted.add(5);
2414        assert_eq!(unsorted.atkinson(1.0), Some(0.0));
2415        assert_eq!(atkinson(vec![5].into_iter(), 1.0), Some(0.0));
2416    }
2417
2418    #[test]
2419    fn atkinson_perfect_equality() {
2420        // All values the same - perfect equality, Atkinson = 0
2421        let mut unsorted = Unsorted::new();
2422        unsorted.extend(vec![10, 10, 10, 10, 10]);
2423        let result = unsorted.atkinson(1.0).unwrap();
2424        assert!((result - 0.0).abs() < 1e-10);
2425    }
2426
2427    #[test]
2428    fn atkinson_epsilon_zero() {
2429        // Epsilon = 0 means no inequality aversion, should return 0
2430        let mut unsorted = Unsorted::new();
2431        unsorted.extend(vec![1, 2, 3, 4, 5]);
2432        let result = unsorted.atkinson(0.0).unwrap();
2433        assert!((result - 0.0).abs() < 1e-10);
2434    }
2435
2436    #[test]
2437    fn atkinson_epsilon_one() {
2438        // Epsilon = 1 uses geometric mean
2439        let mut unsorted = Unsorted::new();
2440        unsorted.extend(vec![1, 2, 3, 4, 5]);
2441        let result = unsorted.atkinson(1.0);
2442        assert!(result.is_some());
2443    }
2444
2445    #[test]
2446    fn atkinson_negative_epsilon() {
2447        let mut unsorted = Unsorted::new();
2448        unsorted.extend(vec![1, 2, 3, 4, 5]);
2449        assert_eq!(unsorted.atkinson(-1.0), None);
2450    }
2451
2452    #[test]
2453    fn atkinson_zero_mean() {
2454        // If mean is zero, Atkinson is undefined
2455        let mut unsorted = Unsorted::new();
2456        unsorted.extend(vec![0, 0, 0, 0]);
2457        assert_eq!(unsorted.atkinson(1.0), None);
2458    }
2459
2460    #[test]
2461    fn atkinson_stream() {
2462        let result = atkinson(vec![1usize, 2, 3, 4, 5].into_iter(), 1.0);
2463        assert!(result.is_some());
2464    }
2465}
2466
2467#[cfg(test)]
2468mod bench {
2469    use super::*;
2470    use std::time::Instant;
2471
2472    #[test]
2473    #[ignore] // Run with `cargo test comprehensive_quartiles_benchmark -- --ignored --nocapture` to see performance comparison
2474    fn comprehensive_quartiles_benchmark() {
2475        // Test a wide range of data sizes
2476        let data_sizes = vec![
2477            1_000, 10_000, 100_000, 500_000, 1_000_000, 2_000_000, 5_000_000, 10_000_000,
2478        ];
2479
2480        println!("=== COMPREHENSIVE QUARTILES BENCHMARK ===\n");
2481
2482        for size in data_sizes {
2483            println!("--- Testing with {} elements ---", size);
2484
2485            // Test different data patterns
2486            let test_patterns = vec![
2487                ("Random", generate_random_data(size)),
2488                ("Reverse Sorted", {
2489                    let mut v = Vec::with_capacity(size);
2490                    for x in (0..size).rev() {
2491                        v.push(x as i32);
2492                    }
2493                    v
2494                }),
2495                ("Already Sorted", {
2496                    let mut v = Vec::with_capacity(size);
2497                    for x in 0..size {
2498                        v.push(x as i32);
2499                    }
2500                    v
2501                }),
2502                ("Many Duplicates", {
2503                    // Create a vector with just a few distinct values repeated many times
2504                    let mut v = Vec::with_capacity(size);
2505                    let chunk_size = size / 100;
2506                    for i in 0..100 {
2507                        v.extend(std::iter::repeat_n(i, chunk_size));
2508                    }
2509                    // Add any remaining elements
2510                    v.extend(std::iter::repeat_n(0, size - v.len()));
2511                    v
2512                }),
2513            ];
2514
2515            for (pattern_name, test_data) in test_patterns {
2516                println!("\n  Pattern: {}", pattern_name);
2517
2518                // Benchmark sorting-based approach
2519                let mut unsorted1 = Unsorted::new();
2520                unsorted1.extend(test_data.clone());
2521
2522                let start = Instant::now();
2523                let result_sorted = unsorted1.quartiles();
2524                let sorted_time = start.elapsed();
2525
2526                // Benchmark selection-based approach (with copying)
2527                let mut unsorted2 = Unsorted::new();
2528                unsorted2.extend(test_data.clone());
2529
2530                let start = Instant::now();
2531                let result_selection = unsorted2.quartiles_with_selection();
2532                let selection_time = start.elapsed();
2533
2534                // Benchmark zero-copy selection-based approach
2535                let mut unsorted3 = Unsorted::new();
2536                unsorted3.extend(test_data);
2537
2538                let start = Instant::now();
2539                let result_zero_copy = unsorted3.quartiles_zero_copy();
2540                let zero_copy_time = start.elapsed();
2541
2542                // Verify results are the same
2543                assert_eq!(result_sorted, result_selection);
2544                assert_eq!(result_sorted, result_zero_copy);
2545
2546                let selection_speedup =
2547                    sorted_time.as_nanos() as f64 / selection_time.as_nanos() as f64;
2548                let zero_copy_speedup =
2549                    sorted_time.as_nanos() as f64 / zero_copy_time.as_nanos() as f64;
2550
2551                println!("    Sorting:       {:>12?}", sorted_time);
2552                println!(
2553                    "    Selection:     {:>12?} (speedup: {:.2}x)",
2554                    selection_time, selection_speedup
2555                );
2556                println!(
2557                    "    Zero-copy:     {:>12?} (speedup: {:.2}x)",
2558                    zero_copy_time, zero_copy_speedup
2559                );
2560
2561                let best_algorithm =
2562                    if zero_copy_speedup > 1.0 && zero_copy_speedup >= selection_speedup {
2563                        "ZERO-COPY"
2564                    } else if selection_speedup > 1.0 {
2565                        "SELECTION"
2566                    } else {
2567                        "SORTING"
2568                    };
2569                println!("    Best: {}", best_algorithm);
2570            }
2571
2572            println!(); // Add blank line between sizes
2573        }
2574    }
2575
2576    // Generate random data for benchmarking
2577    fn generate_random_data(size: usize) -> Vec<i32> {
2578        // Simple LCG random number generator for reproducible results
2579        let mut rng = 1234567u64;
2580        let mut vec = Vec::with_capacity(size);
2581        for _ in 0..size {
2582            rng = rng.wrapping_mul(1103515245).wrapping_add(12345);
2583            vec.push((rng >> 16) as i32);
2584        }
2585        vec
2586    }
2587
2588    #[test]
2589    #[ignore] // Run with `cargo test find_selection_threshold -- --ignored --nocapture` to find exact threshold
2590    fn find_selection_threshold() {
2591        println!("=== FINDING SELECTION ALGORITHM THRESHOLD ===\n");
2592
2593        // Binary search approach to find the threshold
2594        let mut found_threshold = None;
2595        let test_sizes = vec![
2596            1_000_000, 2_000_000, 3_000_000, 4_000_000, 5_000_000, 7_500_000, 10_000_000,
2597            15_000_000, 20_000_000, 25_000_000, 30_000_000,
2598        ];
2599
2600        for size in test_sizes {
2601            println!("Testing size: {}", size);
2602
2603            // Use random data as it's most representative of real-world scenarios
2604            let test_data = generate_random_data(size);
2605
2606            // Run multiple iterations to get average performance
2607            let iterations = 3;
2608            let mut sorting_total = 0u128;
2609            let mut selection_total = 0u128;
2610            let mut zero_copy_total = 0u128;
2611
2612            for i in 0..iterations {
2613                println!("  Iteration {}/{}", i + 1, iterations);
2614
2615                // Sorting approach
2616                let mut unsorted1 = Unsorted::new();
2617                unsorted1.extend(test_data.clone());
2618
2619                let start = Instant::now();
2620                let _result_sorted = unsorted1.quartiles();
2621                sorting_total += start.elapsed().as_nanos();
2622
2623                // Selection approach (with copying)
2624                let mut unsorted2 = Unsorted::new();
2625                unsorted2.extend(test_data.clone());
2626
2627                let start = Instant::now();
2628                let _result_selection = unsorted2.quartiles_with_selection();
2629                selection_total += start.elapsed().as_nanos();
2630
2631                // Zero-copy selection approach
2632                let mut unsorted3 = Unsorted::new();
2633                unsorted3.extend(test_data.clone());
2634
2635                let start = Instant::now();
2636                let _result_zero_copy = unsorted3.quartiles_zero_copy();
2637                zero_copy_total += start.elapsed().as_nanos();
2638            }
2639
2640            let avg_sorting = sorting_total / iterations as u128;
2641            let avg_selection = selection_total / iterations as u128;
2642            let avg_zero_copy = zero_copy_total / iterations as u128;
2643            let selection_speedup = avg_sorting as f64 / avg_selection as f64;
2644            let zero_copy_speedup = avg_sorting as f64 / avg_zero_copy as f64;
2645
2646            println!(
2647                "  Average sorting:    {:>12.2}ms",
2648                avg_sorting as f64 / 1_000_000.0
2649            );
2650            println!(
2651                "  Average selection:  {:>12.2}ms (speedup: {:.2}x)",
2652                avg_selection as f64 / 1_000_000.0,
2653                selection_speedup
2654            );
2655            println!(
2656                "  Average zero-copy:  {:>12.2}ms (speedup: {:.2}x)",
2657                avg_zero_copy as f64 / 1_000_000.0,
2658                zero_copy_speedup
2659            );
2660
2661            if (selection_speedup > 1.0 || zero_copy_speedup > 1.0) && found_threshold.is_none() {
2662                found_threshold = Some(size);
2663                let best_method = if zero_copy_speedup > selection_speedup {
2664                    "Zero-copy"
2665                } else {
2666                    "Selection"
2667                };
2668                println!(
2669                    "  *** THRESHOLD FOUND: {} becomes faster at {} elements ***",
2670                    best_method, size
2671                );
2672            }
2673
2674            println!();
2675        }
2676
2677        match found_threshold {
2678            Some(threshold) => println!(
2679                "🎯 Selection algorithm becomes faster at approximately {} elements",
2680                threshold
2681            ),
2682            None => println!("❌ Selection algorithm did not become faster in the tested range"),
2683        }
2684    }
2685
2686    #[test]
2687    #[ignore] // Run with `cargo test benchmark_different_data_types -- --ignored --nocapture` to test different data types
2688    fn benchmark_different_data_types() {
2689        println!("=== BENCHMARKING DIFFERENT DATA TYPES ===\n");
2690
2691        let size = 5_000_000; // Use a large size where differences might be visible
2692
2693        // Test with f64 (floating point)
2694        println!("Testing with f64 data:");
2695        let float_data: Vec<f64> = generate_random_data(size)
2696            .into_iter()
2697            .map(|x| x as f64 / 1000.0)
2698            .collect();
2699
2700        let mut unsorted1 = Unsorted::new();
2701        unsorted1.extend(float_data.clone());
2702        let start = Instant::now();
2703        let _result = unsorted1.quartiles();
2704        let sorting_time = start.elapsed();
2705
2706        let mut unsorted2 = Unsorted::new();
2707        unsorted2.extend(float_data.clone());
2708        let start = Instant::now();
2709        let _result = unsorted2.quartiles_with_selection();
2710        let selection_time = start.elapsed();
2711
2712        let mut unsorted3 = Unsorted::new();
2713        unsorted3.extend(float_data);
2714        let start = Instant::now();
2715        let _result = unsorted3.quartiles_zero_copy();
2716        let zero_copy_time = start.elapsed();
2717
2718        println!("  Sorting:    {:?}", sorting_time);
2719        println!("  Selection:  {:?}", selection_time);
2720        println!("  Zero-copy:  {:?}", zero_copy_time);
2721        println!(
2722            "  Selection Speedup:  {:.2}x",
2723            sorting_time.as_nanos() as f64 / selection_time.as_nanos() as f64
2724        );
2725        println!(
2726            "  Zero-copy Speedup:  {:.2}x\n",
2727            sorting_time.as_nanos() as f64 / zero_copy_time.as_nanos() as f64
2728        );
2729
2730        // Test with i64 (larger integers)
2731        println!("Testing with i64 data:");
2732        let int64_data: Vec<i64> = generate_random_data(size)
2733            .into_iter()
2734            .map(|x| x as i64 * 1000)
2735            .collect();
2736
2737        let mut unsorted1 = Unsorted::new();
2738        unsorted1.extend(int64_data.clone());
2739        let start = Instant::now();
2740        let _result = unsorted1.quartiles();
2741        let sorting_time = start.elapsed();
2742
2743        let mut unsorted2 = Unsorted::new();
2744        unsorted2.extend(int64_data.clone());
2745        let start = Instant::now();
2746        let _result = unsorted2.quartiles_with_selection();
2747        let selection_time = start.elapsed();
2748
2749        let mut unsorted3 = Unsorted::new();
2750        unsorted3.extend(int64_data);
2751        let start = Instant::now();
2752        let _result = unsorted3.quartiles_zero_copy();
2753        let zero_copy_time = start.elapsed();
2754
2755        println!("  Sorting:    {:?}", sorting_time);
2756        println!("  Selection:  {:?}", selection_time);
2757        println!("  Zero-copy:  {:?}", zero_copy_time);
2758        println!(
2759            "  Selection Speedup:  {:.2}x",
2760            sorting_time.as_nanos() as f64 / selection_time.as_nanos() as f64
2761        );
2762        println!(
2763            "  Zero-copy Speedup:  {:.2}x",
2764            sorting_time.as_nanos() as f64 / zero_copy_time.as_nanos() as f64
2765        );
2766    }
2767}
stats/unsorted.rs

stats/
unsorted.rs