stats/
unsorted.rs

1use num_traits::ToPrimitive;
2use rayon::iter::{IndexedParallelIterator, IntoParallelRefIterator, ParallelIterator};
3use rayon::prelude::ParallelSlice;
4use rayon::slice::ParallelSliceMut;
5
6use serde::{Deserialize, Serialize};
7
8use {crate::Commute, crate::Partial};
9
10// PARALLEL_THRESHOLD (10,000) is the minimum dataset size for rayon parallel sort.
11// The separate 10,240 threshold in cardinality estimation (5 × 2,048) is aligned to
12// cache-line-friendly chunk sizes for parallel iterator reduction.
13const PARALLEL_THRESHOLD: usize = 10_000;
14
15/// Compute the exact median on a stream of data.
16///
17/// (This has time complexity `O(nlogn)` and space complexity `O(n)`.)
18#[inline]
19pub fn median<I>(it: I) -> Option<f64>
20where
21    I: Iterator,
22    <I as Iterator>::Item: PartialOrd + ToPrimitive + Send,
23{
24    it.collect::<Unsorted<_>>().median()
25}
26
27/// Compute the median absolute deviation (MAD) on a stream of data.
28#[inline]
29pub fn mad<I>(it: I, precalc_median: Option<f64>) -> Option<f64>
30where
31    I: Iterator,
32    <I as Iterator>::Item: PartialOrd + ToPrimitive + Send + Sync,
33{
34    it.collect::<Unsorted<_>>().mad(precalc_median)
35}
36
37/// Compute the exact 1-, 2-, and 3-quartiles (Q1, Q2 a.k.a. median, and Q3) on a stream of data.
38///
39/// (This has time complexity `O(n log n)` and space complexity `O(n)`.)
40#[inline]
41pub fn quartiles<I>(it: I) -> Option<(f64, f64, f64)>
42where
43    I: Iterator,
44    <I as Iterator>::Item: PartialOrd + ToPrimitive + Send,
45{
46    it.collect::<Unsorted<_>>().quartiles()
47}
48
49/// Compute the exact mode on a stream of data.
50///
51/// (This has time complexity `O(nlogn)` and space complexity `O(n)`.)
52///
53/// If the data does not have a mode, then `None` is returned.
54#[inline]
55pub fn mode<T, I>(it: I) -> Option<T>
56where
57    T: PartialOrd + Clone + Send,
58    I: Iterator<Item = T>,
59{
60    it.collect::<Unsorted<T>>().mode()
61}
62
63/// Compute the modes on a stream of data.
64///
65/// If there is a single mode, then only that value is returned in the `Vec`
66/// however, if there are multiple values tied for occurring the most amount of times
67/// those values are returned.
68///
69/// ## Example
70/// ```
71/// use stats;
72///
73/// let vals = vec![1, 1, 2, 2, 3];
74///
75/// assert_eq!(stats::modes(vals.into_iter()), (vec![1, 2], 2, 2));
76/// ```
77/// This has time complexity `O(n)`
78///
79/// If the data does not have a mode, then an empty `Vec` is returned.
80#[inline]
81pub fn modes<T, I>(it: I) -> (Vec<T>, usize, u32)
82where
83    T: PartialOrd + Clone + Send,
84    I: Iterator<Item = T>,
85{
86    it.collect::<Unsorted<T>>().modes()
87}
88
89/// Compute the antimodes on a stream of data.
90///
91/// Antimode is the least frequent non-zero score.
92///
93/// If there is a single antimode, then only that value is returned in the `Vec`
94/// however, if there are multiple values tied for occurring the least amount of times
95/// those values are returned.
96///
97/// Only the first 10 antimodes are returned to prevent returning the whole set
98/// when cardinality = number of records (i.e. all unique values).
99///
100/// ## Example
101/// ```
102/// use stats;
103///
104/// let vals = vec![1, 1, 2, 2, 3];
105///
106/// assert_eq!(stats::antimodes(vals.into_iter()), (vec![3], 1, 1));
107/// ```
108/// This has time complexity `O(n)`
109///
110/// If the data does not have an antimode, then an empty `Vec` is returned.
111#[inline]
112pub fn antimodes<T, I>(it: I) -> (Vec<T>, usize, u32)
113where
114    T: PartialOrd + Clone + Send,
115    I: Iterator<Item = T>,
116{
117    let (antimodes_result, antimodes_count, antimodes_occurrences) =
118        it.collect::<Unsorted<T>>().antimodes();
119    (antimodes_result, antimodes_count, antimodes_occurrences)
120}
121
122/// Compute the Gini Coefficient on a stream of data.
123///
124/// The Gini Coefficient measures inequality in a distribution, ranging from 0 (perfect equality)
125/// to 1 (perfect inequality).
126///
127/// (This has time complexity `O(n log n)` and space complexity `O(n)`.)
128#[inline]
129pub fn gini<I>(it: I, precalc_sum: Option<f64>) -> Option<f64>
130where
131    I: Iterator,
132    <I as Iterator>::Item: PartialOrd + ToPrimitive + Send + Sync,
133{
134    it.collect::<Unsorted<_>>().gini(precalc_sum)
135}
136
137/// Compute the kurtosis (excess kurtosis) on a stream of data.
138///
139/// Kurtosis measures the "tailedness" of a distribution. Excess kurtosis is kurtosis - 3,
140/// where 0 indicates a normal distribution, positive values indicate heavy tails, and
141/// negative values indicate light tails.
142///
143/// (This has time complexity `O(n log n)` and space complexity `O(n)`.)
144#[inline]
145pub fn kurtosis<I>(it: I, precalc_mean: Option<f64>, precalc_variance: Option<f64>) -> Option<f64>
146where
147    I: Iterator,
148    <I as Iterator>::Item: PartialOrd + ToPrimitive + Send + Sync,
149{
150    it.collect::<Unsorted<_>>()
151        .kurtosis(precalc_mean, precalc_variance)
152}
153
154/// Compute the percentile rank of a value on a stream of data.
155///
156/// Returns the percentile rank (0-100) of the given value in the distribution.
157/// If the value is less than all values, returns 0.0. If greater than all, returns 100.0.
158///
159/// (This has time complexity `O(n log n)` and space complexity `O(n)`.)
160#[inline]
161pub fn percentile_rank<I, V>(it: I, value: V) -> Option<f64>
162where
163    I: Iterator,
164    <I as Iterator>::Item: PartialOrd + ToPrimitive + Send + Sync,
165    V: PartialOrd + ToPrimitive,
166{
167    it.collect::<Unsorted<_>>().percentile_rank(value)
168}
169
170/// Compute the Atkinson Index on a stream of data.
171///
172/// The Atkinson Index measures inequality with an inequality aversion parameter ε.
173/// It ranges from 0 (perfect equality) to 1 (perfect inequality).
174/// Higher ε values give more weight to inequality at the lower end of the distribution.
175///
176/// (This has time complexity `O(n log n)` and space complexity `O(n)`.)
177#[inline]
178pub fn atkinson<I>(
179    it: I,
180    epsilon: f64,
181    precalc_mean: Option<f64>,
182    precalc_geometric_sum: Option<f64>,
183) -> Option<f64>
184where
185    I: Iterator,
186    <I as Iterator>::Item: PartialOrd + ToPrimitive + Send + Sync,
187{
188    it.collect::<Unsorted<_>>()
189        .atkinson(epsilon, precalc_mean, precalc_geometric_sum)
190}
191
192fn median_on_sorted<T>(data: &[T]) -> Option<f64>
193where
194    T: PartialOrd + ToPrimitive,
195{
196    Some(match data.len() {
197        // Empty slice case - return None early
198        0 => {
199            core::hint::cold_path();
200            return None;
201        }
202        // Single element case - return that element converted to f64
203        1 => data.first()?.to_f64()?,
204        // Even length case - average the two middle elements
205        len if len.is_multiple_of(2) => {
206            let idx = len / 2;
207            // Safety: we know these indices are valid because we checked len is even and non-zero,
208            // so idx-1 and idx are valid indices into data
209            let v1 = unsafe { data.get_unchecked(idx - 1) }.to_f64()?;
210            let v2 = unsafe { data.get_unchecked(idx) }.to_f64()?;
211            f64::midpoint(v1, v2)
212        }
213        // Odd length case - return the middle element
214        // Safety: we know the index is within bounds
215        len => unsafe { data.get_unchecked(len / 2) }.to_f64()?,
216    })
217}
218
219fn mad_on_sorted<T>(data: &[T], precalc_median: Option<f64>) -> Option<f64>
220where
221    T: Sync + PartialOrd + ToPrimitive,
222{
223    if data.is_empty() {
224        core::hint::cold_path();
225        return None;
226    }
227    let median_obs = precalc_median.unwrap_or_else(|| median_on_sorted(data).unwrap());
228
229    // Use adaptive parallel processing based on data size
230    let mut abs_diff_vec: Vec<f64> = if data.len() < PARALLEL_THRESHOLD {
231        // Sequential processing for small datasets
232        // Iterator collect enables TrustedLen optimization, eliminating per-element bounds checks
233        data.iter()
234            // SAFETY: to_f64() always returns Some for standard numeric types (f32/f64, i/u 8-64)
235            .map(|x| (median_obs - unsafe { x.to_f64().unwrap_unchecked() }).abs())
236            .collect()
237    } else {
238        // Parallel processing for large datasets
239        data.par_iter()
240            // SAFETY: to_f64() always returns Some for standard numeric types
241            .map(|x| (median_obs - unsafe { x.to_f64().unwrap_unchecked() }).abs())
242            .collect()
243    };
244
245    // Use select_nth_unstable to find the median in O(n) instead of O(n log n) full sort
246    let len = abs_diff_vec.len();
247    let mid = len / 2;
248    let cmp = |a: &f64, b: &f64| a.total_cmp(b);
249
250    abs_diff_vec.select_nth_unstable_by(mid, cmp);
251
252    if len.is_multiple_of(2) {
253        // Even length: need both mid-1 and mid elements
254        let right = abs_diff_vec[mid];
255        // The left partition [0..mid] contains elements <= abs_diff_vec[mid],
256        // so we can find the max of the left partition for mid-1
257        let left = abs_diff_vec[..mid]
258            .iter()
259            .max_by(|a, b| cmp(a, b))
260            .copied()?;
261        Some(f64::midpoint(left, right))
262    } else {
263        Some(abs_diff_vec[mid])
264    }
265}
266
267fn gini_on_sorted<T>(data: &[Partial<T>], precalc_sum: Option<f64>) -> Option<f64>
268where
269    T: Sync + PartialOrd + ToPrimitive,
270{
271    let len = data.len();
272
273    // Early return for empty data
274    if len == 0 {
275        core::hint::cold_path();
276        return None;
277    }
278
279    // Single element case: perfect equality, Gini = 0
280    if len == 1 {
281        core::hint::cold_path();
282        return Some(0.0);
283    }
284
285    // Gini coefficient is only defined for non-negative distributions.
286    // Since data is sorted, check the first (smallest) element.
287    // SAFETY: len > 1 guaranteed by checks above
288    let first_val = unsafe { data.get_unchecked(0).0.to_f64().unwrap_unchecked() };
289    if first_val < 0.0 {
290        core::hint::cold_path();
291        return None;
292    }
293
294    // Compute sum and weighted sum.
295    // When precalc_sum is provided, only compute weighted_sum in a single pass.
296    // When not provided, fuse both computations into a single pass over the data
297    // to halve cache pressure (following the fold/reduce pattern used in kurtosis).
298    let (sum, weighted_sum) = if let Some(precalc) = precalc_sum {
299        if precalc < 0.0 {
300            core::hint::cold_path();
301            return None;
302        }
303        // Only need weighted_sum — single pass
304        let weighted_sum = if len < PARALLEL_THRESHOLD {
305            let mut weighted_sum = 0.0;
306            for (i, x) in data.iter().enumerate() {
307                // SAFETY: to_f64() always returns Some for standard numeric types
308                let val = unsafe { x.0.to_f64().unwrap_unchecked() };
309                weighted_sum = ((i + 1) as f64).mul_add(val, weighted_sum);
310            }
311            weighted_sum
312        } else {
313            data.par_iter()
314                .enumerate()
315                .map(|(i, x)| {
316                    // SAFETY: to_f64() always returns Some for standard numeric types
317                    let val = unsafe { x.0.to_f64().unwrap_unchecked() };
318                    (i + 1) as f64 * val
319                })
320                .sum()
321        };
322        (precalc, weighted_sum)
323    } else if len < PARALLEL_THRESHOLD {
324        // Fused single pass: compute both sum and weighted_sum together
325        let mut sum = 0.0;
326        let mut weighted_sum = 0.0;
327        for (i, x) in data.iter().enumerate() {
328            // SAFETY: to_f64() always returns Some for standard numeric types (f32/f64, i/u 8-64)
329            let val = unsafe { x.0.to_f64().unwrap_unchecked() };
330            sum += val;
331            weighted_sum = ((i + 1) as f64).mul_add(val, weighted_sum);
332        }
333        (sum, weighted_sum)
334    } else {
335        // Fused parallel single pass using fold/reduce
336        data.par_iter()
337            .enumerate()
338            .fold(
339                || (0.0_f64, 0.0_f64),
340                |acc, (i, x)| {
341                    // SAFETY: to_f64() always returns Some for standard numeric types
342                    let val = unsafe { x.0.to_f64().unwrap_unchecked() };
343                    (acc.0 + val, ((i + 1) as f64).mul_add(val, acc.1))
344                },
345            )
346            .reduce(|| (0.0, 0.0), |a, b| (a.0 + b.0, a.1 + b.1))
347    };
348
349    // If sum is zero, Gini is undefined
350    if sum == 0.0 {
351        core::hint::cold_path();
352        return None;
353    }
354
355    // Compute Gini coefficient using the formula:
356    // G = (2 * Σ(i * y_i)) / (n * Σ(y_i)) - (n + 1) / n
357    // where i is 1-indexed rank and y_i are sorted values
358    let n = len as f64;
359    let gini = 2.0f64.mul_add(weighted_sum / (n * sum), -(n + 1.0) / n);
360
361    Some(gini)
362}
363
364fn kurtosis_on_sorted<T>(
365    data: &[Partial<T>],
366    precalc_mean: Option<f64>,
367    precalc_variance: Option<f64>,
368) -> Option<f64>
369where
370    T: Sync + PartialOrd + ToPrimitive,
371{
372    let len = data.len();
373
374    // Need at least 4 elements for meaningful kurtosis
375    if len < 4 {
376        core::hint::cold_path();
377        return None;
378    }
379
380    // Use pre-calculated mean if provided, otherwise compute it
381    let mean = precalc_mean.unwrap_or_else(|| {
382        let sum: f64 = if len < PARALLEL_THRESHOLD {
383            // Iterator sum enables auto-vectorization (SIMD) by the compiler
384            data.iter()
385                // SAFETY: to_f64() always returns Some for standard numeric types (f32/f64, i/u 8-64)
386                .map(|x| unsafe { x.0.to_f64().unwrap_unchecked() })
387                .sum()
388        } else {
389            data.par_iter()
390                // SAFETY: to_f64() always returns Some for standard numeric types
391                .map(|x| unsafe { x.0.to_f64().unwrap_unchecked() })
392                .sum()
393        };
394        sum / len as f64
395    });
396
397    // Compute variance_sq and fourth_power_sum
398    // If variance is provided, we can compute variance_sq directly (variance_sq = variance^2)
399    // Otherwise, we need to compute variance from the data
400    let (variance_sq, fourth_power_sum) = if let Some(variance) = precalc_variance {
401        // Negative variance is invalid (possible floating-point rounding artifact)
402        if variance < 0.0 {
403            core::hint::cold_path();
404            return None;
405        }
406        // Use pre-calculated variance: variance_sq = variance^2
407        let variance_sq = variance * variance;
408
409        // Still need to compute fourth_power_sum
410        let fourth_power_sum = if len < PARALLEL_THRESHOLD {
411            let mut sum = 0.0;
412            for x in data {
413                // SAFETY: to_f64() always returns Some for standard numeric types
414                let val = unsafe { x.0.to_f64().unwrap_unchecked() };
415                let diff = val - mean;
416                let diff_sq = diff * diff;
417                sum = diff_sq.mul_add(diff_sq, sum);
418            }
419            sum
420        } else {
421            data.par_iter()
422                .map(|x| {
423                    // SAFETY: to_f64() always returns Some for standard numeric types
424                    let val = unsafe { x.0.to_f64().unwrap_unchecked() };
425                    let diff = val - mean;
426                    let diff_sq = diff * diff;
427                    diff_sq * diff_sq
428                })
429                .sum()
430        };
431
432        (variance_sq, fourth_power_sum)
433    } else {
434        // Compute both variance_sum and fourth_power_sum
435        let (variance_sum, fourth_power_sum) = if len < PARALLEL_THRESHOLD {
436            let mut variance_sum = 0.0;
437            let mut fourth_power_sum = 0.0;
438
439            for x in data {
440                // SAFETY: to_f64() always returns Some for standard numeric types
441                let val = unsafe { x.0.to_f64().unwrap_unchecked() };
442                let diff = val - mean;
443                let diff_sq = diff * diff;
444                variance_sum += diff_sq;
445                fourth_power_sum = diff_sq.mul_add(diff_sq, fourth_power_sum);
446            }
447
448            (variance_sum, fourth_power_sum)
449        } else {
450            // Single pass computing both sums simultaneously
451            data.par_iter()
452                .fold(
453                    || (0.0_f64, 0.0_f64),
454                    |acc, x| {
455                        // SAFETY: to_f64() always returns Some for standard numeric types
456                        let val = unsafe { x.0.to_f64().unwrap_unchecked() };
457                        let diff = val - mean;
458                        let diff_sq = diff * diff;
459                        (acc.0 + diff_sq, diff_sq.mul_add(diff_sq, acc.1))
460                    },
461                )
462                .reduce(|| (0.0, 0.0), |a, b| (a.0 + b.0, a.1 + b.1))
463        };
464
465        let variance = variance_sum / len as f64;
466
467        // If variance is zero, all values are the same, kurtosis is undefined
468        if variance == 0.0 {
469            core::hint::cold_path();
470            return None;
471        }
472
473        let variance_sq = variance * variance;
474        (variance_sq, fourth_power_sum)
475    };
476
477    // If variance_sq is zero, all values are the same, kurtosis is undefined
478    if variance_sq == 0.0 {
479        core::hint::cold_path();
480        return None;
481    }
482
483    let n = len as f64;
484
485    // Sample excess kurtosis formula:
486    // kurtosis = (n(n+1) * Σ((x_i - mean)⁴)) / ((n-1)(n-2)(n-3) * variance²) - 3(n-1)²/((n-2)(n-3))
487    let denominator = (n - 1.0) * (n - 2.0) * (n - 3.0);
488    let adjustment = 3.0 * (n - 1.0) * (n - 1.0) / denominator;
489    let kurtosis =
490        (n * (n + 1.0) * fourth_power_sum).mul_add(1.0 / (denominator * variance_sq), -adjustment);
491
492    Some(kurtosis)
493}
494
495fn percentile_rank_on_sorted<T, V>(data: &[Partial<T>], value: &V) -> Option<f64>
496where
497    T: PartialOrd + ToPrimitive,
498    V: PartialOrd + ToPrimitive,
499{
500    let len = data.len();
501
502    if len == 0 {
503        core::hint::cold_path();
504        return None;
505    }
506
507    let value_f64 = value.to_f64()?;
508
509    // Binary search to find the position where value would be inserted
510    // This gives us the number of values <= value
511    let count_leq = data.binary_search_by(|x| {
512        x.0.to_f64()
513            .unwrap_or(f64::NAN)
514            .partial_cmp(&value_f64)
515            .unwrap_or(std::cmp::Ordering::Less)
516    });
517
518    let count = match count_leq {
519        Ok(idx) => {
520            // Value found at idx — use partition_point (O(log n)) to find the upper bound
521            // of equal values instead of a linear scan
522            let upper = data[idx + 1..].partition_point(|x| {
523                x.0.to_f64()
524                    .is_some_and(|v| v.total_cmp(&value_f64).is_le())
525            });
526            idx + 1 + upper
527        }
528        Err(idx) => idx, // Number of values less than value
529    };
530
531    // Percentile rank = (count / n) * 100
532    Some((count as f64 / len as f64) * 100.0)
533}
534
535fn atkinson_on_sorted<T>(
536    data: &[Partial<T>],
537    epsilon: f64,
538    precalc_mean: Option<f64>,
539    precalc_geometric_sum: Option<f64>,
540) -> Option<f64>
541where
542    T: Sync + PartialOrd + ToPrimitive,
543{
544    let len = data.len();
545
546    // Early return for empty data
547    if len == 0 {
548        core::hint::cold_path();
549        return None;
550    }
551
552    // Single element case: perfect equality, Atkinson = 0
553    if len == 1 {
554        core::hint::cold_path();
555        return Some(0.0);
556    }
557
558    // Epsilon must be non-negative
559    if epsilon < 0.0 {
560        core::hint::cold_path();
561        return None;
562    }
563
564    // Use pre-calculated mean if provided, otherwise compute it
565    let mean = precalc_mean.unwrap_or_else(|| {
566        let sum: f64 = if len < PARALLEL_THRESHOLD {
567            // Iterator sum enables auto-vectorization (SIMD) by the compiler
568            data.iter()
569                // SAFETY: to_f64() always returns Some for standard numeric types (f32/f64, i/u 8-64)
570                .map(|x| unsafe { x.0.to_f64().unwrap_unchecked() })
571                .sum()
572        } else {
573            data.par_iter()
574                // SAFETY: to_f64() always returns Some for standard numeric types
575                .map(|x| unsafe { x.0.to_f64().unwrap_unchecked() })
576                .sum()
577        };
578        sum / len as f64
579    });
580
581    // If mean is zero, Atkinson is undefined
582    if mean == 0.0 {
583        core::hint::cold_path();
584        return None;
585    }
586
587    // Handle special case: epsilon = 1 (uses geometric mean)
588    if (epsilon - 1.0).abs() < 1e-10 {
589        // A_1 = 1 - (geometric_mean / mean)
590        let geometric_sum: f64 = if let Some(precalc) = precalc_geometric_sum {
591            precalc
592        } else if len < PARALLEL_THRESHOLD {
593            let mut sum = 0.0;
594            for x in data {
595                // SAFETY: to_f64() always returns Some for standard numeric types
596                let val = unsafe { x.0.to_f64().unwrap_unchecked() };
597                if val <= 0.0 {
598                    // Geometric mean undefined for non-positive values
599                    return None;
600                }
601                sum += val.ln();
602            }
603            sum
604        } else {
605            data.par_iter()
606                .map(|x| {
607                    // SAFETY: to_f64() always returns Some for standard numeric types
608                    let val = unsafe { x.0.to_f64().unwrap_unchecked() };
609                    if val <= 0.0 {
610                        return f64::NAN;
611                    }
612                    val.ln()
613                })
614                .sum()
615        };
616
617        if geometric_sum.is_nan() {
618            core::hint::cold_path();
619            return None;
620        }
621
622        let geometric_mean = (geometric_sum / len as f64).exp();
623        return Some(1.0 - geometric_mean / mean);
624    }
625
626    // General case: epsilon != 1
627    // A_ε = 1 - (1/n * Σ((x_i/mean)^(1-ε)))^(1/(1-ε))
628    let exponent = 1.0 - epsilon;
629
630    let sum_powered: f64 = if len < PARALLEL_THRESHOLD {
631        let mut sum = 0.0;
632        for x in data {
633            // SAFETY: to_f64() always returns Some for standard numeric types
634            let val = unsafe { x.0.to_f64().unwrap_unchecked() };
635            if val < 0.0 {
636                // Negative values with non-integer exponent are undefined
637                return None;
638            }
639            let ratio = val / mean;
640            sum += ratio.powf(exponent);
641        }
642        sum
643    } else {
644        data.par_iter()
645            .map(|x| {
646                // SAFETY: to_f64() always returns Some for standard numeric types
647                let val = unsafe { x.0.to_f64().unwrap_unchecked() };
648                if val < 0.0 {
649                    return f64::NAN;
650                }
651                let ratio = val / mean;
652                ratio.powf(exponent)
653            })
654            .sum()
655    };
656
657    if sum_powered.is_nan() || sum_powered <= 0.0 {
658        core::hint::cold_path();
659        return None;
660    }
661
662    let atkinson = 1.0 - (sum_powered / len as f64).powf(1.0 / exponent);
663    Some(atkinson)
664}
665
666/// Selection algorithm to find the k-th smallest element in O(n) average time.
667/// This is an implementation of quickselect algorithm.
668#[cfg(test)]
669fn quickselect<T>(data: &mut [Partial<T>], k: usize) -> Option<&T>
670where
671    T: PartialOrd,
672{
673    if data.is_empty() || k >= data.len() {
674        core::hint::cold_path();
675        return None;
676    }
677
678    let mut left = 0;
679    let mut right = data.len() - 1;
680
681    loop {
682        if left == right {
683            return Some(&data[left].0);
684        }
685
686        // Use median-of-three pivot selection for better performance
687        let pivot_idx = median_of_three_pivot(data, left, right);
688        let pivot_idx = partition(data, left, right, pivot_idx);
689
690        match k.cmp(&pivot_idx) {
691            std::cmp::Ordering::Equal => return Some(&data[pivot_idx].0),
692            std::cmp::Ordering::Less => right = pivot_idx - 1,
693            std::cmp::Ordering::Greater => left = pivot_idx + 1,
694        }
695    }
696}
697
698/// Select the median of three elements as pivot for better quickselect performance
699#[cfg(test)]
700fn median_of_three_pivot<T>(data: &[Partial<T>], left: usize, right: usize) -> usize
701where
702    T: PartialOrd,
703{
704    let mid = left + (right - left) / 2;
705
706    if data[left] <= data[mid] {
707        if data[mid] <= data[right] {
708            mid
709        } else if data[left] <= data[right] {
710            right
711        } else {
712            left
713        }
714    } else if data[left] <= data[right] {
715        left
716    } else if data[mid] <= data[right] {
717        right
718    } else {
719        mid
720    }
721}
722
723/// Partition function for quickselect
724#[cfg(test)]
725fn partition<T>(data: &mut [Partial<T>], left: usize, right: usize, pivot_idx: usize) -> usize
726where
727    T: PartialOrd,
728{
729    // Move pivot to end
730    data.swap(pivot_idx, right);
731    let mut store_idx = left;
732
733    // Move all elements smaller than pivot to the left
734    // Cache pivot position for better cache locality (access data[right] directly each time)
735    for i in left..right {
736        // Safety: i, store_idx, and right are guaranteed to be in bounds
737        // Compare directly with pivot at data[right] - compiler should optimize this access
738        if unsafe { data.get_unchecked(i) <= data.get_unchecked(right) } {
739            data.swap(i, store_idx);
740            store_idx += 1;
741        }
742    }
743
744    // Move pivot to its final place
745    data.swap(store_idx, right);
746    store_idx
747}
748
749// This implementation follows Method 3 from https://en.wikipedia.org/wiki/Quartile
750// It divides data into quarters based on the length n = 4k + r where r is remainder.
751// For each remainder case (0,1,2,3), it uses different formulas to compute Q1, Q2, Q3.
752fn quartiles_on_sorted<T>(data: &[Partial<T>]) -> Option<(f64, f64, f64)>
753where
754    T: PartialOrd + ToPrimitive,
755{
756    let len = data.len();
757
758    // Early return for small arrays
759    match len {
760        0..=2 => {
761            core::hint::cold_path();
762            return None;
763        }
764        3 => {
765            return Some(
766                // SAFETY: We know these indices are valid because len == 3
767                unsafe {
768                    (
769                        data.get_unchecked(0).0.to_f64()?,
770                        data.get_unchecked(1).0.to_f64()?,
771                        data.get_unchecked(2).0.to_f64()?,
772                    )
773                },
774            );
775        }
776        _ => {}
777    }
778
779    // Calculate k and remainder in one division
780    let k = len / 4;
781    let remainder = len % 4;
782
783    // SAFETY: All index calculations below are guaranteed to be in bounds
784    // because we've verified len >= 4 above and k is len/4
785    unsafe {
786        Some(match remainder {
787            0 => {
788                // Let data = {x_i}_{i=0..4k} where k is positive integer.
789                // Median q2 = (x_{2k-1} + x_{2k}) / 2.
790                // If we divide data into two parts {x_i < q2} as L and
791                // {x_i > q2} as R, #L == #R == 2k holds true. Thus,
792                // q1 = (x_{k-1} + x_{k}) / 2 and q3 = (x_{3k-1} + x_{3k}) / 2.
793                // =============
794                // Simply put: Length is multiple of 4 (4k)
795                // q1 = (x_{k-1} + x_k) / 2
796                // q2 = (x_{2k-1} + x_{2k}) / 2
797                // q3 = (x_{3k-1} + x_{3k}) / 2
798                let q1 = f64::midpoint(
799                    data.get_unchecked(k - 1).0.to_f64()?,
800                    data.get_unchecked(k).0.to_f64()?,
801                );
802                let q2 = f64::midpoint(
803                    data.get_unchecked(2 * k - 1).0.to_f64()?,
804                    data.get_unchecked(2 * k).0.to_f64()?,
805                );
806                let q3 = f64::midpoint(
807                    data.get_unchecked(3 * k - 1).0.to_f64()?,
808                    data.get_unchecked(3 * k).0.to_f64()?,
809                );
810                (q1, q2, q3)
811            }
812            1 => {
813                // Let data = {x_i}_{i=0..4k+1} where k is positive integer.
814                // Median q2 = x_{2k}.
815                // If we divide data other than q2 into two parts {x_i < q2}
816                // as L and {x_i > q2} as R, #L == #R == 2k holds true. Thus,
817                // q1 = (x_{k-1} + x_{k}) / 2 and q3 = (x_{3k} + x_{3k+1}) / 2.
818                // =============
819                // Simply put: Length is 4k + 1
820                // q1 = (x_{k-1} + x_k) / 2
821                // q2 = x_{2k}
822                // q3 = (x_{3k} + x_{3k+1}) / 2
823                let q1 = f64::midpoint(
824                    data.get_unchecked(k - 1).0.to_f64()?,
825                    data.get_unchecked(k).0.to_f64()?,
826                );
827                let q2 = data.get_unchecked(2 * k).0.to_f64()?;
828                let q3 = f64::midpoint(
829                    data.get_unchecked(3 * k).0.to_f64()?,
830                    data.get_unchecked(3 * k + 1).0.to_f64()?,
831                );
832                (q1, q2, q3)
833            }
834            2 => {
835                // Let data = {x_i}_{i=0..4k+2} where k is positive integer.
836                // Median q2 = (x_{(2k+1)-1} + x_{2k+1}) / 2.
837                // If we divide data into two parts {x_i < q2} as L and
838                // {x_i > q2} as R, it's true that #L == #R == 2k+1.
839                // Thus, q1 = x_{k} and q3 = x_{3k+1}.
840                // =============
841                // Simply put: Length is 4k + 2
842                // q1 = x_k
843                // q2 = (x_{2k} + x_{2k+1}) / 2
844                // q3 = x_{3k+1}
845                let q1 = data.get_unchecked(k).0.to_f64()?;
846                let q2 = f64::midpoint(
847                    data.get_unchecked(2 * k).0.to_f64()?,
848                    data.get_unchecked(2 * k + 1).0.to_f64()?,
849                );
850                let q3 = data.get_unchecked(3 * k + 1).0.to_f64()?;
851                (q1, q2, q3)
852            }
853            _ => {
854                // Let data = {x_i}_{i=0..4k+3} where k is positive integer.
855                // Median q2 = x_{2k+1}.
856                // If we divide data other than q2 into two parts {x_i < q2}
857                // as L and {x_i > q2} as R, #L == #R == 2k+1 holds true.
858                // Thus, q1 = x_{k} and q3 = x_{3k+2}.
859                // =============
860                // Simply put: Length is 4k + 3
861                // q1 = x_k
862                // q2 = x_{2k+1}
863                // q3 = x_{3k+2}
864                let q1 = data.get_unchecked(k).0.to_f64()?;
865                let q2 = data.get_unchecked(2 * k + 1).0.to_f64()?;
866                let q3 = data.get_unchecked(3 * k + 2).0.to_f64()?;
867                (q1, q2, q3)
868            }
869        })
870    }
871}
872
873/// Zero-copy quartiles computation using index-based selection.
874/// This avoids copying data by working with an array of indices.
875///
876/// Uses `select_nth_unstable_by` on the indices array, which partitions in-place.
877/// After selecting position p, elements at indices [0..p] are <= the p-th element
878/// and elements at [p+1..] are >= it. By selecting positions in ascending order,
879/// each subsequent selection only needs to search within the right partition,
880/// avoiding redundant O(n) resets.
881fn quartiles_with_zero_copy_selection<T>(data: &[Partial<T>]) -> Option<(f64, f64, f64)>
882where
883    T: PartialOrd + ToPrimitive,
884{
885    let len = data.len();
886
887    // Early return for small arrays
888    match len {
889        0..=2 => {
890            core::hint::cold_path();
891            return None;
892        }
893        3 => {
894            let mut indices: Vec<usize> = (0..3).collect();
895            let cmp = |a: &usize, b: &usize| {
896                data[*a]
897                    .partial_cmp(&data[*b])
898                    .unwrap_or(std::cmp::Ordering::Less)
899            };
900            indices.sort_unstable_by(cmp);
901            let min_val = data[indices[0]].0.to_f64()?;
902            let med_val = data[indices[1]].0.to_f64()?;
903            let max_val = data[indices[2]].0.to_f64()?;
904            return Some((min_val, med_val, max_val));
905        }
906        _ => {}
907    }
908
909    let k = len / 4;
910    let remainder = len % 4;
911
912    let mut indices: Vec<usize> = (0..len).collect();
913    let cmp = |a: &usize, b: &usize| {
914        data[*a]
915            .partial_cmp(&data[*b])
916            .unwrap_or(std::cmp::Ordering::Less)
917    };
918
919    // Collect the unique positions we need in ascending order.
920    // By selecting in ascending order, each select_nth_unstable_by partitions
921    // the array so subsequent selections operate on progressively smaller slices.
922    // We deduplicate because adjacent quartile boundaries can overlap for small k.
923    let raw_positions: Vec<usize> = match remainder {
924        0 => vec![k - 1, k, 2 * k - 1, 2 * k, 3 * k - 1, 3 * k],
925        1 => vec![k - 1, k, 2 * k, 3 * k, 3 * k + 1],
926        2 => vec![k, 2 * k, 2 * k + 1, 3 * k + 1],
927        _ => vec![k, 2 * k + 1, 3 * k + 2],
928    };
929
930    let mut unique_positions = raw_positions.clone();
931    unique_positions.dedup();
932
933    // Select each unique position in ascending order, narrowing the search range
934    let mut start = 0;
935    for &pos in &unique_positions {
936        indices[start..].select_nth_unstable_by(pos - start, &cmp);
937        start = pos + 1;
938    }
939
940    // Now read all needed values (including duplicates) from the partitioned indices
941    let values: Vec<f64> = raw_positions
942        .iter()
943        .map(|&pos| data[indices[pos]].0.to_f64())
944        .collect::<Option<Vec<_>>>()?;
945
946    match remainder {
947        0 => {
948            let q1 = f64::midpoint(values[0], values[1]);
949            let q2 = f64::midpoint(values[2], values[3]);
950            let q3 = f64::midpoint(values[4], values[5]);
951            Some((q1, q2, q3))
952        }
953        1 => {
954            let q1 = f64::midpoint(values[0], values[1]);
955            let q2 = values[2];
956            let q3 = f64::midpoint(values[3], values[4]);
957            Some((q1, q2, q3))
958        }
959        2 => {
960            let q1 = values[0];
961            let q2 = f64::midpoint(values[1], values[2]);
962            let q3 = values[3];
963            Some((q1, q2, q3))
964        }
965        _ => Some((values[0], values[1], values[2])),
966    }
967}
968
969fn mode_on_sorted<T, I>(it: I) -> Option<T>
970where
971    T: PartialOrd,
972    I: Iterator<Item = T>,
973{
974    use std::cmp::Ordering;
975
976    // This approach to computing the mode works very nicely when the
977    // number of samples is large and is close to its cardinality.
978    // In other cases, a hashmap would be much better.
979    // But really, how can we know this when given an arbitrary stream?
980    // Might just switch to a hashmap to track frequencies. That would also
981    // be generally useful for discovering the cardinality of a sample.
982    let (mut mode, mut next) = (None, None);
983    let (mut mode_count, mut next_count) = (0usize, 0usize);
984    for x in it {
985        if mode.as_ref() == Some(&x) {
986            mode_count += 1;
987        } else if next.as_ref() == Some(&x) {
988            next_count += 1;
989        } else {
990            next = Some(x);
991            next_count = 0;
992        }
993
994        match next_count.cmp(&mode_count) {
995            Ordering::Greater => {
996                mode = next;
997                mode_count = next_count;
998                next = None;
999                next_count = 0;
1000            }
1001            Ordering::Equal => {
1002                mode = None;
1003                mode_count = 0;
1004            }
1005            Ordering::Less => {}
1006        }
1007    }
1008    mode
1009}
1010
1011/// Computes both modes and antimodes from a sorted slice of values.
1012/// This version works with references to avoid unnecessary cloning.
1013///
1014/// # Arguments
1015///
1016/// * `data` - A sorted slice of values
1017///
1018/// # Notes
1019///
1020/// - Mode is the most frequently occurring value(s)
1021/// - Antimode is the least frequently occurring value(s)
1022/// - Only returns up to 10 antimodes to avoid returning the full set when all values are unique
1023/// - For empty slices, returns empty vectors and zero counts
1024/// - For single value slices, returns that value as the mode and empty antimode
1025/// - When all values occur exactly once, returns empty mode and up to 10 values as antimodes
1026///
1027/// # Returns
1028///
1029/// A tuple containing:
1030/// * Modes information: `(Vec<T>, usize, u32)` where:
1031///   - Vec<T>: Vector containing the mode values
1032///   - usize: Number of modes found
1033///   - u32: Frequency/count of the mode values
1034/// * Antimodes information: `(Vec<T>, usize, u32)` where:
1035///   - Vec<T>: Vector containing up to 10 antimode values
1036///   - usize: Total number of antimodes
1037///   - u32: Frequency/count of the antimode values
1038#[allow(clippy::type_complexity)]
1039#[inline]
1040fn modes_and_antimodes_on_sorted_slice<T>(
1041    data: &[Partial<T>],
1042) -> ((Vec<T>, usize, u32), (Vec<T>, usize, u32))
1043where
1044    T: PartialOrd + Clone,
1045{
1046    let size = data.len();
1047
1048    // Early return for empty slice
1049    if size == 0 {
1050        core::hint::cold_path();
1051        return ((Vec::new(), 0, 0), (Vec::new(), 0, 0));
1052    }
1053
1054    // Estimate capacity using integer square root of size
1055    let sqrt_size = size.isqrt();
1056    let mut runs: Vec<(&T, u32)> = Vec::with_capacity(sqrt_size.clamp(16, 1_000));
1057
1058    let mut current_value = &data[0].0;
1059    let mut current_count = 1;
1060    let mut highest_count = 1;
1061    let mut lowest_count = u32::MAX;
1062
1063    // Count consecutive runs - optimized to reduce allocations
1064    for x in data.iter().skip(1) {
1065        if x.0 == *current_value {
1066            current_count += 1;
1067            highest_count = highest_count.max(current_count);
1068        } else {
1069            runs.push((current_value, current_count));
1070            lowest_count = lowest_count.min(current_count);
1071            current_value = &x.0;
1072            current_count = 1;
1073        }
1074    }
1075    runs.push((current_value, current_count));
1076    lowest_count = lowest_count.min(current_count);
1077
1078    // Early return if only one unique value
1079    if runs.len() == 1 {
1080        let (val, count) = runs.pop().unwrap();
1081        return ((vec![val.clone()], 1, count), (Vec::new(), 0, 0));
1082    }
1083
1084    // Special case: if all values appear exactly once
1085    if highest_count == 1 {
1086        let antimodes_count = runs.len().min(10);
1087        let total_count = runs.len();
1088        let mut antimodes = Vec::with_capacity(antimodes_count);
1089        for (val, _) in runs.into_iter().take(antimodes_count) {
1090            antimodes.push(val.clone());
1091        }
1092        // For modes: empty, count 0, occurrences 0 (not 1, 1)
1093        return ((Vec::new(), 0, 0), (antimodes, total_count, 1));
1094    }
1095
1096    // Collect modes and antimodes directly in a single pass, cloning values immediately
1097    // instead of collecting indices first and then cloning in a second pass
1098    let estimated_modes = (runs.len() / 10).clamp(1, 10);
1099    let estimated_antimodes = 10.min(runs.len());
1100
1101    let mut modes_result = Vec::with_capacity(estimated_modes);
1102    let mut antimodes_result = Vec::with_capacity(estimated_antimodes);
1103    let mut mode_count = 0;
1104    let mut antimodes_count = 0;
1105    let mut antimodes_collected = 0_u32;
1106
1107    for (val, count) in &runs {
1108        if *count == highest_count {
1109            modes_result.push((*val).clone());
1110            mode_count += 1;
1111        }
1112        if *count == lowest_count {
1113            antimodes_count += 1;
1114            if antimodes_collected < 10 {
1115                antimodes_result.push((*val).clone());
1116                antimodes_collected += 1;
1117            }
1118        }
1119    }
1120
1121    (
1122        (modes_result, mode_count, highest_count),
1123        (antimodes_result, antimodes_count, lowest_count),
1124    )
1125}
1126
1127/// A commutative data structure for lazily sorted sequences of data.
1128///
1129/// The sort does not occur until statistics need to be computed.
1130///
1131/// Note that this works on types that do not define a total ordering like
1132/// `f32` and `f64`. When an ordering is not defined, an arbitrary order
1133/// is returned.
1134#[allow(clippy::unsafe_derive_deserialize)]
1135#[derive(Clone, Serialize, Deserialize)]
1136pub struct Unsorted<T> {
1137    /// Internal cache flag indicating whether `data` is currently sorted.
1138    /// This field is skipped during serialization and deserialization.
1139    #[serde(skip)]
1140    sorted: bool,
1141    data: Vec<Partial<T>>,
1142}
1143
1144// Manual PartialEq/Eq: ignore `sorted` cache flag so equality reflects
1145// logical contents only (two Unsorted with same data compare equal
1146// regardless of whether one has been sorted).
1147impl<T: PartialEq> PartialEq for Unsorted<T> {
1148    fn eq(&self, other: &Self) -> bool {
1149        self.data == other.data
1150    }
1151}
1152
1153impl<T: PartialEq> Eq for Unsorted<T> where Partial<T>: Eq {}
1154
1155impl<T: PartialOrd + Send> Unsorted<T> {
1156    /// Create initial empty state.
1157    #[inline]
1158    #[must_use]
1159    pub fn new() -> Unsorted<T> {
1160        Default::default()
1161    }
1162
1163    /// Add a new element to the set.
1164    #[allow(clippy::inline_always)]
1165    #[inline(always)]
1166    pub fn add(&mut self, v: T) {
1167        self.sorted = false;
1168        self.data.push(Partial(v));
1169    }
1170
1171    /// Return the number of data points.
1172    #[inline]
1173    #[must_use]
1174    pub const fn len(&self) -> usize {
1175        self.data.len()
1176    }
1177
1178    #[inline]
1179    #[must_use]
1180    pub const fn is_empty(&self) -> bool {
1181        self.data.is_empty()
1182    }
1183
1184    #[inline]
1185    fn sort(&mut self) {
1186        if !self.sorted {
1187            // Use sequential sort for small datasets (< 10k elements) to avoid parallel overhead
1188            if self.data.len() < PARALLEL_THRESHOLD {
1189                self.data.sort_unstable();
1190            } else {
1191                self.data.par_sort_unstable();
1192            }
1193            self.sorted = true;
1194        }
1195    }
1196
1197    #[inline]
1198    const fn already_sorted(&mut self) {
1199        self.sorted = true;
1200    }
1201
1202    /// Add multiple elements efficiently
1203    #[inline]
1204    pub fn add_bulk(&mut self, values: Vec<T>) {
1205        self.sorted = false;
1206        self.data.reserve(values.len());
1207        self.data.extend(values.into_iter().map(Partial));
1208    }
1209
1210    /// Shrink capacity to fit current data
1211    #[inline]
1212    pub fn shrink_to_fit(&mut self) {
1213        self.data.shrink_to_fit();
1214    }
1215
1216    /// Create with specific capacity
1217    #[inline]
1218    #[must_use]
1219    pub fn with_capacity(capacity: usize) -> Self {
1220        Unsorted {
1221            sorted: true,
1222            data: Vec::with_capacity(capacity),
1223        }
1224    }
1225
1226    /// Add a value assuming it's greater than all existing values
1227    #[inline]
1228    pub fn push_ascending(&mut self, value: T) {
1229        if let Some(last) = self.data.last() {
1230            debug_assert!(last.0 <= value, "Value must be >= than last element");
1231        }
1232        self.data.push(Partial(value));
1233        // Data remains sorted
1234    }
1235}
1236
1237impl<T: PartialOrd + PartialEq + Clone + Send + Sync> Unsorted<T> {
1238    #[inline]
1239    /// Returns the cardinality of the data.
1240    /// Set `sorted` to `true` if the data is already sorted.
1241    /// Set `parallel_threshold` to `0` to force sequential processing.
1242    /// Set `parallel_threshold` to `1` to use the default parallel threshold (`10_000`).
1243    /// Set `parallel_threshold` to `2` to force parallel processing.
1244    /// Set `parallel_threshold` to any other value to use a custom parallel threshold
1245    /// greater than the default threshold of `10_000`.
1246    pub fn cardinality(&mut self, sorted: bool, parallel_threshold: usize) -> u64 {
1247        const CHUNK_SIZE: usize = 2048; // Process data in chunks of 2048 elements
1248        const DEFAULT_PARALLEL_THRESHOLD: usize = 10_240; // multiple of 2048
1249
1250        let len = self.data.len();
1251        match len {
1252            0 => return 0,
1253            1 => return 1,
1254            _ => {}
1255        }
1256
1257        if sorted {
1258            self.already_sorted();
1259        } else {
1260            self.sort();
1261        }
1262
1263        let use_parallel = parallel_threshold != 0
1264            && (parallel_threshold == 1
1265                || len > parallel_threshold.max(DEFAULT_PARALLEL_THRESHOLD));
1266
1267        if use_parallel {
1268            // Parallel processing using chunks
1269            // Process chunks in parallel, returning (count, first_elem, last_elem) for each
1270            type ChunkInfo<'a, T> = Vec<(u64, Option<&'a Partial<T>>, Option<&'a Partial<T>>)>;
1271            let chunk_info: ChunkInfo<'_, T> = self
1272                .data
1273                .par_chunks(CHUNK_SIZE)
1274                .map(|chunk| {
1275                    // Count unique elements within this chunk
1276                    let mut count = u64::from(!chunk.is_empty());
1277                    for [a, b] in chunk.array_windows::<2>() {
1278                        if a != b {
1279                            count += 1;
1280                        }
1281                    }
1282                    (count, chunk.first(), chunk.last())
1283                })
1284                .collect();
1285
1286            // Combine results, checking boundaries between chunks
1287            let mut total = 0;
1288            for (i, &(count, first_opt, _last_opt)) in chunk_info.iter().enumerate() {
1289                total += count;
1290
1291                // Check boundary with previous chunk
1292                if i > 0
1293                    && let (Some(prev_last), Some(curr_first)) = (chunk_info[i - 1].2, first_opt)
1294                    && prev_last == curr_first
1295                {
1296                    total -= 1; // Deduct 1 if boundary values are equal
1297                }
1298            }
1299
1300            total
1301        } else {
1302            // Sequential processing
1303
1304            // the statement below is equivalent to:
1305            // let mut count = if self.data.is_empty() { 0 } else { 1 };
1306            let mut count = u64::from(!self.data.is_empty());
1307
1308            for [a, b] in self.data.array_windows::<2>() {
1309                if a != b {
1310                    count += 1;
1311                }
1312            }
1313            count
1314        }
1315    }
1316}
1317
1318impl<T: PartialOrd + Clone + Send> Unsorted<T> {
1319    /// Returns the mode of the data.
1320    #[inline]
1321    pub fn mode(&mut self) -> Option<T> {
1322        if self.data.is_empty() {
1323            return None;
1324        }
1325        self.sort();
1326        mode_on_sorted(self.data.iter().map(|p| &p.0)).cloned()
1327    }
1328
1329    /// Returns the modes of the data.
1330    /// Note that there is also a `frequency::mode()` function that return one mode
1331    /// with the highest frequency. If there is a tie, it returns None.
1332    #[inline]
1333    fn modes(&mut self) -> (Vec<T>, usize, u32) {
1334        if self.data.is_empty() {
1335            return (Vec::new(), 0, 0);
1336        }
1337        self.sort();
1338        modes_and_antimodes_on_sorted_slice(&self.data).0
1339    }
1340
1341    /// Returns the antimodes of the data.
1342    /// `antimodes_result` only returns the first 10 antimodes
1343    #[inline]
1344    fn antimodes(&mut self) -> (Vec<T>, usize, u32) {
1345        if self.data.is_empty() {
1346            return (Vec::new(), 0, 0);
1347        }
1348        self.sort();
1349        modes_and_antimodes_on_sorted_slice(&self.data).1
1350    }
1351
1352    /// Returns the modes and antimodes of the data.
1353    /// `antimodes_result` only returns the first 10 antimodes
1354    #[allow(clippy::type_complexity)]
1355    #[inline]
1356    pub fn modes_antimodes(&mut self) -> ((Vec<T>, usize, u32), (Vec<T>, usize, u32)) {
1357        if self.data.is_empty() {
1358            return ((Vec::new(), 0, 0), (Vec::new(), 0, 0));
1359        }
1360        self.sort();
1361        modes_and_antimodes_on_sorted_slice(&self.data)
1362    }
1363}
1364
1365impl Unsorted<Vec<u8>> {
1366    /// Add a byte slice, converting to `Vec<u8>` internally.
1367    ///
1368    /// This is a convenience method that avoids requiring the caller to call
1369    /// `.to_vec()` before `add()`. The allocation still occurs internally,
1370    /// but the API is cleaner and opens the door for future optimizations
1371    /// (e.g., frequency-map deduplication for high-cardinality data).
1372    #[allow(clippy::inline_always)]
1373    #[inline(always)]
1374    pub fn add_bytes(&mut self, v: &[u8]) {
1375        self.sorted = false;
1376        self.data.push(Partial(v.to_vec()));
1377    }
1378}
1379
1380impl<T: PartialOrd + ToPrimitive + Send> Unsorted<T> {
1381    /// Returns the median of the data.
1382    #[inline]
1383    pub fn median(&mut self) -> Option<f64> {
1384        if self.data.is_empty() {
1385            return None;
1386        }
1387        self.sort();
1388        median_on_sorted(&self.data)
1389    }
1390}
1391
1392impl<T: PartialOrd + ToPrimitive + Send + Sync> Unsorted<T> {
1393    /// Returns the Median Absolute Deviation (MAD) of the data.
1394    #[inline]
1395    pub fn mad(&mut self, existing_median: Option<f64>) -> Option<f64> {
1396        if self.data.is_empty() {
1397            return None;
1398        }
1399        if existing_median.is_none() {
1400            self.sort();
1401        }
1402        mad_on_sorted(&self.data, existing_median)
1403    }
1404}
1405
1406impl<T: PartialOrd + ToPrimitive + Send> Unsorted<T> {
1407    /// Returns the quartiles of the data using the traditional sorting approach.
1408    ///
1409    /// This method sorts the data first and then computes quartiles.
1410    /// Time complexity: O(n log n)
1411    #[inline]
1412    pub fn quartiles(&mut self) -> Option<(f64, f64, f64)> {
1413        if self.data.is_empty() {
1414            return None;
1415        }
1416        self.sort();
1417        quartiles_on_sorted(&self.data)
1418    }
1419}
1420
1421impl<T: PartialOrd + ToPrimitive + Send + Sync> Unsorted<T> {
1422    /// Returns the Gini Coefficient of the data.
1423    ///
1424    /// The Gini Coefficient measures inequality in a distribution, ranging from 0 (perfect equality)
1425    /// to 1 (perfect inequality). This method sorts the data first and then computes the Gini coefficient.
1426    /// Time complexity: O(n log n)
1427    #[inline]
1428    pub fn gini(&mut self, precalc_sum: Option<f64>) -> Option<f64> {
1429        if self.data.is_empty() {
1430            return None;
1431        }
1432        self.sort();
1433        gini_on_sorted(&self.data, precalc_sum)
1434    }
1435
1436    /// Returns the kurtosis (excess kurtosis) of the data.
1437    ///
1438    /// Kurtosis measures the "tailedness" of a distribution. Excess kurtosis is kurtosis - 3,
1439    /// where 0 indicates a normal distribution, positive values indicate heavy tails, and
1440    /// negative values indicate light tails. This method sorts the data first and then computes kurtosis.
1441    /// Time complexity: O(n log n)
1442    #[inline]
1443    pub fn kurtosis(
1444        &mut self,
1445        precalc_mean: Option<f64>,
1446        precalc_variance: Option<f64>,
1447    ) -> Option<f64> {
1448        if self.data.is_empty() {
1449            return None;
1450        }
1451        self.sort();
1452        kurtosis_on_sorted(&self.data, precalc_mean, precalc_variance)
1453    }
1454
1455    /// Returns the percentile rank of a value in the data.
1456    ///
1457    /// Returns the percentile rank (0-100) of the given value. If the value is less than all
1458    /// values, returns 0.0. If greater than all, returns 100.0.
1459    /// This method sorts the data first and then computes the percentile rank.
1460    /// Time complexity: O(n log n)
1461    #[inline]
1462    #[allow(clippy::needless_pass_by_value)]
1463    pub fn percentile_rank<V>(&mut self, value: V) -> Option<f64>
1464    where
1465        V: PartialOrd + ToPrimitive,
1466    {
1467        if self.data.is_empty() {
1468            return None;
1469        }
1470        self.sort();
1471        percentile_rank_on_sorted(&self.data, &value)
1472    }
1473
1474    /// Returns the Atkinson Index of the data.
1475    ///
1476    /// The Atkinson Index measures inequality with an inequality aversion parameter ε.
1477    /// It ranges from 0 (perfect equality) to 1 (perfect inequality).
1478    /// Higher ε values give more weight to inequality at the lower end of the distribution.
1479    /// This method sorts the data first and then computes the Atkinson index.
1480    /// Time complexity: O(n log n)
1481    ///
1482    /// # Arguments
1483    /// * `epsilon` - Inequality aversion parameter (must be >= 0). Common values:
1484    ///   - 0.0: No inequality aversion (returns 0)
1485    ///   - 0.5: Moderate aversion
1486    ///   - 1.0: Uses geometric mean (special case)
1487    ///   - 2.0: High aversion
1488    /// * `precalc_mean` - Optional pre-calculated mean
1489    /// * `precalc_geometric_sum` - Optional pre-calculated geometric sum (sum of ln(val)), only used when epsilon = 1
1490    #[inline]
1491    pub fn atkinson(
1492        &mut self,
1493        epsilon: f64,
1494        precalc_mean: Option<f64>,
1495        precalc_geometric_sum: Option<f64>,
1496    ) -> Option<f64> {
1497        if self.data.is_empty() {
1498            return None;
1499        }
1500        self.sort();
1501        atkinson_on_sorted(&self.data, epsilon, precalc_mean, precalc_geometric_sum)
1502    }
1503}
1504
1505impl<T: PartialOrd + ToPrimitive + Clone + Send> Unsorted<T> {
1506    /// Returns the quartiles of the data using selection algorithm.
1507    ///
1508    /// This implementation uses a selection algorithm (quickselect) to find quartiles
1509    /// in O(n) average time complexity instead of O(n log n) sorting.
1510    /// Requires T to implement Clone to create a working copy of the data.
1511    ///
1512    /// **Performance Note**: While theoretically O(n) vs O(n log n), this implementation
1513    /// is often slower than the sorting-based approach for small to medium datasets due to:
1514    /// - Need to find multiple order statistics (3 separate quickselect calls)
1515    /// - Overhead of copying data to avoid mutation
1516    /// - Rayon's highly optimized parallel sorting
1517    #[inline]
1518    pub fn quartiles_with_selection(&mut self) -> Option<(f64, f64, f64)> {
1519        if self.data.is_empty() {
1520            return None;
1521        }
1522        // Use zero-copy approach (indices-based) to avoid cloning all elements
1523        quartiles_with_zero_copy_selection(&self.data)
1524    }
1525}
1526
1527impl<T: PartialOrd + ToPrimitive + Send> Unsorted<T> {
1528    /// Returns the quartiles using zero-copy selection algorithm.
1529    ///
1530    /// This implementation avoids copying data by working with indices instead,
1531    /// providing better performance than the clone-based selection approach.
1532    /// The algorithm is O(n) average time and only allocates a vector of indices (usize).
1533    #[inline]
1534    #[must_use]
1535    pub fn quartiles_zero_copy(&self) -> Option<(f64, f64, f64)> {
1536        if self.data.is_empty() {
1537            return None;
1538        }
1539        quartiles_with_zero_copy_selection(&self.data)
1540    }
1541}
1542
1543impl<T: PartialOrd + Send> Commute for Unsorted<T> {
1544    #[inline]
1545    fn merge(&mut self, mut v: Unsorted<T>) {
1546        if v.is_empty() {
1547            return;
1548        }
1549
1550        self.sorted = false;
1551        // we use std::mem::take to avoid unnecessary allocations
1552        self.data.extend(std::mem::take(&mut v.data));
1553    }
1554}
1555
1556impl<T: PartialOrd> Default for Unsorted<T> {
1557    #[inline]
1558    fn default() -> Unsorted<T> {
1559        Unsorted {
1560            data: Vec::with_capacity(16),
1561            sorted: true, // empty is sorted
1562        }
1563    }
1564}
1565
1566impl<T: PartialOrd + Send> FromIterator<T> for Unsorted<T> {
1567    #[inline]
1568    fn from_iter<I: IntoIterator<Item = T>>(it: I) -> Unsorted<T> {
1569        let mut v = Unsorted::new();
1570        v.extend(it);
1571        v
1572    }
1573}
1574
1575impl<T: PartialOrd> Extend<T> for Unsorted<T> {
1576    #[inline]
1577    fn extend<I: IntoIterator<Item = T>>(&mut self, it: I) {
1578        self.sorted = false;
1579        self.data.extend(it.into_iter().map(Partial));
1580    }
1581}
1582
1583fn custom_percentiles_on_sorted<T>(data: &[Partial<T>], percentiles: &[u8]) -> Option<Vec<T>>
1584where
1585    T: PartialOrd + Clone,
1586{
1587    let len = data.len();
1588
1589    // Early return for empty array or invalid percentiles
1590    if len == 0 || percentiles.iter().any(|&p| p > 100) {
1591        return None;
1592    }
1593
1594    // Optimize: Check if percentiles are already sorted and unique
1595    let unique_percentiles: Vec<u8> = if percentiles.len() <= 1 {
1596        // Single or empty percentile - no need to sort/dedup
1597        percentiles.to_vec()
1598    } else {
1599        // Check if already sorted and unique (common case)
1600        let is_sorted_unique = percentiles.array_windows::<2>().all(|[a, b]| a < b);
1601
1602        if is_sorted_unique {
1603            // Already sorted and unique, use directly without cloning
1604            percentiles.to_vec()
1605        } else {
1606            // Need to sort and dedup - use fixed-size bool array (domain is 0..=100)
1607            let mut seen = [false; 101];
1608            let mut sorted_unique = Vec::with_capacity(percentiles.len().min(101));
1609            for &p in percentiles {
1610                if !seen[p as usize] {
1611                    seen[p as usize] = true;
1612                    sorted_unique.push(p);
1613                }
1614            }
1615            sorted_unique.sort_unstable();
1616            sorted_unique
1617        }
1618    };
1619
1620    let mut results = Vec::with_capacity(unique_percentiles.len());
1621
1622    // SAFETY: All index calculations below are guaranteed to be in bounds
1623    // because we've verified len > 0 and the rank calculation ensures
1624    // the index is within bounds
1625    unsafe {
1626        for &p in &unique_percentiles {
1627            // Calculate the ordinal rank using nearest-rank method
1628            // see https://en.wikipedia.org/wiki/Percentile#The_nearest-rank_method
1629            // n = ⌈(P/100) × N⌉
1630            #[allow(clippy::cast_sign_loss)]
1631            let rank = ((f64::from(p) / 100.0) * len as f64).ceil() as usize;
1632
1633            // Convert to 0-based index
1634            let idx = rank.saturating_sub(1);
1635
1636            // Get the value at that rank and extract the inner value
1637            results.push(data.get_unchecked(idx).0.clone());
1638        }
1639    }
1640
1641    Some(results)
1642}
1643
1644impl<T: PartialOrd + Clone + Send> Unsorted<T> {
1645    /// Returns the requested percentiles of the data.
1646    ///
1647    /// Uses the nearest-rank method to compute percentiles.
1648    /// Each returned value is an actual value from the dataset.
1649    ///
1650    /// # Arguments
1651    /// * `percentiles` - A slice of u8 values representing percentiles to compute (0-100)
1652    ///
1653    /// # Returns
1654    /// * `None` if the data is empty or if any percentile is > 100
1655    /// * `Some(Vec<T>)` containing percentile values in the same order as requested
1656    ///
1657    /// # Example
1658    /// ```
1659    /// use stats::Unsorted;
1660    /// let mut data = Unsorted::new();
1661    /// data.extend(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
1662    /// let percentiles = vec![25, 50, 75];
1663    /// let results = data.custom_percentiles(&percentiles).unwrap();
1664    /// assert_eq!(results, vec![3, 5, 8]);
1665    /// ```
1666    #[inline]
1667    pub fn custom_percentiles(&mut self, percentiles: &[u8]) -> Option<Vec<T>> {
1668        if self.data.is_empty() {
1669            return None;
1670        }
1671        self.sort();
1672        custom_percentiles_on_sorted(&self.data, percentiles)
1673    }
1674}
1675
1676#[cfg(test)]
1677mod test {
1678    use super::*;
1679
1680    #[test]
1681    fn test_cardinality_empty() {
1682        let mut unsorted: Unsorted<i32> = Unsorted::new();
1683        assert_eq!(unsorted.cardinality(false, 1), 0);
1684    }
1685
1686    #[test]
1687    fn test_cardinality_single_element() {
1688        let mut unsorted = Unsorted::new();
1689        unsorted.add(5);
1690        assert_eq!(unsorted.cardinality(false, 1), 1);
1691    }
1692
1693    #[test]
1694    fn test_cardinality_unique_elements() {
1695        let mut unsorted = Unsorted::new();
1696        unsorted.extend(vec![1, 2, 3, 4, 5]);
1697        assert_eq!(unsorted.cardinality(false, 1), 5);
1698    }
1699
1700    #[test]
1701    fn test_cardinality_duplicate_elements() {
1702        let mut unsorted = Unsorted::new();
1703        unsorted.extend(vec![1, 2, 2, 3, 3, 3, 4, 4, 4, 4]);
1704        assert_eq!(unsorted.cardinality(false, 1), 4);
1705    }
1706
1707    #[test]
1708    fn test_cardinality_all_same() {
1709        let mut unsorted = Unsorted::new();
1710        unsorted.extend(vec![1; 100]);
1711        assert_eq!(unsorted.cardinality(false, 1), 1);
1712    }
1713
1714    #[test]
1715    fn test_cardinality_large_range() {
1716        let mut unsorted = Unsorted::new();
1717        unsorted.extend(0..1_000_000);
1718        assert_eq!(unsorted.cardinality(false, 1), 1_000_000);
1719    }
1720
1721    #[test]
1722    fn test_cardinality_large_range_sequential() {
1723        let mut unsorted = Unsorted::new();
1724        unsorted.extend(0..1_000_000);
1725        assert_eq!(unsorted.cardinality(false, 2_000_000), 1_000_000);
1726    }
1727
1728    #[test]
1729    fn test_cardinality_presorted() {
1730        let mut unsorted = Unsorted::new();
1731        unsorted.extend(vec![1, 2, 3, 4, 5]);
1732        unsorted.sort();
1733        assert_eq!(unsorted.cardinality(true, 1), 5);
1734    }
1735
1736    #[test]
1737    fn test_cardinality_float() {
1738        let mut unsorted = Unsorted::new();
1739        unsorted.extend(vec![1.0, 1.0, 2.0, 3.0, 3.0, 4.0]);
1740        assert_eq!(unsorted.cardinality(false, 1), 4);
1741    }
1742
1743    #[test]
1744    fn test_cardinality_string() {
1745        let mut unsorted = Unsorted::new();
1746        unsorted.extend(vec!["a", "b", "b", "c", "c", "c"]);
1747        assert_eq!(unsorted.cardinality(false, 1), 3);
1748    }
1749
1750    #[test]
1751    fn test_quartiles_selection_vs_sorted() {
1752        // Test that selection-based quartiles gives same results as sorting-based
1753        let test_cases = vec![
1754            vec![3, 5, 7, 9],
1755            vec![3, 5, 7],
1756            vec![1, 2, 7, 11],
1757            vec![3, 5, 7, 9, 12],
1758            vec![2, 2, 3, 8, 10],
1759            vec![3, 5, 7, 9, 12, 20],
1760            vec![0, 2, 4, 8, 10, 11],
1761            vec![3, 5, 7, 9, 12, 20, 21],
1762            vec![1, 5, 6, 6, 7, 10, 19],
1763        ];
1764
1765        for test_case in test_cases {
1766            let mut unsorted1 = Unsorted::new();
1767            let mut unsorted2 = Unsorted::new();
1768            let mut unsorted3 = Unsorted::new();
1769            unsorted1.extend(test_case.clone());
1770            unsorted2.extend(test_case.clone());
1771            unsorted3.extend(test_case.clone());
1772
1773            let result_sorted = unsorted1.quartiles();
1774            let result_selection = unsorted2.quartiles_with_selection();
1775            let result_zero_copy = unsorted3.quartiles_zero_copy();
1776
1777            assert_eq!(
1778                result_sorted, result_selection,
1779                "Selection mismatch for test case: {:?}",
1780                test_case
1781            );
1782            assert_eq!(
1783                result_sorted, result_zero_copy,
1784                "Zero-copy mismatch for test case: {:?}",
1785                test_case
1786            );
1787        }
1788    }
1789
1790    #[test]
1791    fn test_quartiles_with_selection_small() {
1792        // Test edge cases for selection-based quartiles
1793        let mut unsorted: Unsorted<i32> = Unsorted::new();
1794        assert_eq!(unsorted.quartiles_with_selection(), None);
1795
1796        let mut unsorted = Unsorted::new();
1797        unsorted.extend(vec![1, 2]);
1798        assert_eq!(unsorted.quartiles_with_selection(), None);
1799
1800        let mut unsorted = Unsorted::new();
1801        unsorted.extend(vec![1, 2, 3]);
1802        assert_eq!(unsorted.quartiles_with_selection(), Some((1.0, 2.0, 3.0)));
1803    }
1804
1805    #[test]
1806    fn test_quickselect() {
1807        let data = vec![
1808            Partial(3),
1809            Partial(1),
1810            Partial(4),
1811            Partial(1),
1812            Partial(5),
1813            Partial(9),
1814            Partial(2),
1815            Partial(6),
1816        ];
1817
1818        // Test finding different positions
1819        assert_eq!(quickselect(&mut data.clone(), 0), Some(&1));
1820        assert_eq!(quickselect(&mut data.clone(), 3), Some(&3));
1821        assert_eq!(quickselect(&mut data.clone(), 7), Some(&9));
1822
1823        // Test edge cases
1824        let mut empty: Vec<Partial<i32>> = vec![];
1825        assert_eq!(quickselect(&mut empty, 0), None);
1826
1827        let mut data = vec![Partial(3), Partial(1), Partial(4), Partial(1), Partial(5)];
1828        assert_eq!(quickselect(&mut data, 10), None); // k >= len
1829    }
1830
1831    #[test]
1832    fn median_stream() {
1833        assert_eq!(median(vec![3usize, 5, 7, 9].into_iter()), Some(6.0));
1834        assert_eq!(median(vec![3usize, 5, 7].into_iter()), Some(5.0));
1835    }
1836
1837    #[test]
1838    fn mad_stream() {
1839        assert_eq!(mad(vec![3usize, 5, 7, 9].into_iter(), None), Some(2.0));
1840        assert_eq!(
1841            mad(
1842                vec![
1843                    86usize, 60, 95, 39, 49, 12, 56, 82, 92, 24, 33, 28, 46, 34, 100, 39, 100, 38,
1844                    50, 61, 39, 88, 5, 13, 64
1845                ]
1846                .into_iter(),
1847                None
1848            ),
1849            Some(16.0)
1850        );
1851    }
1852
1853    #[test]
1854    fn mad_stream_precalc_median() {
1855        let data = vec![3usize, 5, 7, 9].into_iter();
1856        let median1 = median(data.clone());
1857        assert_eq!(mad(data, median1), Some(2.0));
1858
1859        let data2 = vec![
1860            86usize, 60, 95, 39, 49, 12, 56, 82, 92, 24, 33, 28, 46, 34, 100, 39, 100, 38, 50, 61,
1861            39, 88, 5, 13, 64,
1862        ]
1863        .into_iter();
1864        let median2 = median(data2.clone());
1865        assert_eq!(mad(data2, median2), Some(16.0));
1866    }
1867
1868    #[test]
1869    fn mode_stream() {
1870        assert_eq!(mode(vec![3usize, 5, 7, 9].into_iter()), None);
1871        assert_eq!(mode(vec![3usize, 3, 3, 3].into_iter()), Some(3));
1872        assert_eq!(mode(vec![3usize, 3, 3, 4].into_iter()), Some(3));
1873        assert_eq!(mode(vec![4usize, 3, 3, 3].into_iter()), Some(3));
1874        assert_eq!(mode(vec![1usize, 1, 2, 3, 3].into_iter()), None);
1875    }
1876
1877    #[test]
1878    fn median_floats() {
1879        assert_eq!(median(vec![3.0f64, 5.0, 7.0, 9.0].into_iter()), Some(6.0));
1880        assert_eq!(median(vec![3.0f64, 5.0, 7.0].into_iter()), Some(5.0));
1881    }
1882
1883    #[test]
1884    fn mode_floats() {
1885        assert_eq!(mode(vec![3.0f64, 5.0, 7.0, 9.0].into_iter()), None);
1886        assert_eq!(mode(vec![3.0f64, 3.0, 3.0, 3.0].into_iter()), Some(3.0));
1887        assert_eq!(mode(vec![3.0f64, 3.0, 3.0, 4.0].into_iter()), Some(3.0));
1888        assert_eq!(mode(vec![4.0f64, 3.0, 3.0, 3.0].into_iter()), Some(3.0));
1889        assert_eq!(mode(vec![1.0f64, 1.0, 2.0, 3.0, 3.0].into_iter()), None);
1890    }
1891
1892    #[test]
1893    fn modes_stream() {
1894        assert_eq!(modes(vec![3usize, 5, 7, 9].into_iter()), (vec![], 0, 0));
1895        assert_eq!(modes(vec![3usize, 3, 3, 3].into_iter()), (vec![3], 1, 4));
1896        assert_eq!(modes(vec![3usize, 3, 4, 4].into_iter()), (vec![3, 4], 2, 2));
1897        assert_eq!(modes(vec![4usize, 3, 3, 3].into_iter()), (vec![3], 1, 3));
1898        assert_eq!(modes(vec![1usize, 1, 2, 2].into_iter()), (vec![1, 2], 2, 2));
1899        let vec: Vec<u32> = vec![];
1900        assert_eq!(modes(vec.into_iter()), (vec![], 0, 0));
1901    }
1902
1903    #[test]
1904    fn modes_floats() {
1905        assert_eq!(
1906            modes(vec![3_f64, 5.0, 7.0, 9.0].into_iter()),
1907            (vec![], 0, 0)
1908        );
1909        assert_eq!(
1910            modes(vec![3_f64, 3.0, 3.0, 3.0].into_iter()),
1911            (vec![3.0], 1, 4)
1912        );
1913        assert_eq!(
1914            modes(vec![3_f64, 3.0, 4.0, 4.0].into_iter()),
1915            (vec![3.0, 4.0], 2, 2)
1916        );
1917        assert_eq!(
1918            modes(vec![1_f64, 1.0, 2.0, 3.0, 3.0].into_iter()),
1919            (vec![1.0, 3.0], 2, 2)
1920        );
1921    }
1922
1923    #[test]
1924    fn antimodes_stream() {
1925        assert_eq!(
1926            antimodes(vec![3usize, 5, 7, 9].into_iter()),
1927            (vec![3, 5, 7, 9], 4, 1)
1928        );
1929        assert_eq!(
1930            antimodes(vec![1usize, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13].into_iter()),
1931            (vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 13, 1)
1932        );
1933        assert_eq!(
1934            antimodes(vec![1usize, 3, 3, 3].into_iter()),
1935            (vec![1], 1, 1)
1936        );
1937        assert_eq!(
1938            antimodes(vec![3usize, 3, 4, 4].into_iter()),
1939            (vec![3, 4], 2, 2)
1940        );
1941        assert_eq!(
1942            antimodes(
1943                vec![
1944                    3usize, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
1945                    14, 14, 15, 15
1946                ]
1947                .into_iter()
1948            ),
1949            // we only show the first 10 of the 13 antimodes
1950            (vec![3, 4, 5, 6, 7, 8, 9, 10, 11, 12], 13, 2)
1951        );
1952        assert_eq!(
1953            antimodes(
1954                vec![
1955                    3usize, 3, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 4, 4, 5, 5, 6, 6, 7, 7, 13, 13,
1956                    14, 14, 15, 15
1957                ]
1958                .into_iter()
1959            ),
1960            (vec![3, 4, 5, 6, 7, 8, 9, 10, 11, 12], 13, 2)
1961        );
1962        assert_eq!(
1963            antimodes(vec![3usize, 3, 3, 4].into_iter()),
1964            (vec![4], 1, 1)
1965        );
1966        assert_eq!(
1967            antimodes(vec![4usize, 3, 3, 3].into_iter()),
1968            (vec![4], 1, 1)
1969        );
1970        assert_eq!(
1971            antimodes(vec![1usize, 1, 2, 2].into_iter()),
1972            (vec![1, 2], 2, 2)
1973        );
1974        let vec: Vec<u32> = vec![];
1975        assert_eq!(antimodes(vec.into_iter()), (vec![], 0, 0));
1976    }
1977
1978    #[test]
1979    fn antimodes_floats() {
1980        assert_eq!(
1981            antimodes(vec![3_f64, 5.0, 7.0, 9.0].into_iter()),
1982            (vec![3.0, 5.0, 7.0, 9.0], 4, 1)
1983        );
1984        assert_eq!(
1985            antimodes(vec![3_f64, 3.0, 3.0, 3.0].into_iter()),
1986            (vec![], 0, 0)
1987        );
1988        assert_eq!(
1989            antimodes(vec![3_f64, 3.0, 4.0, 4.0].into_iter()),
1990            (vec![3.0, 4.0], 2, 2)
1991        );
1992        assert_eq!(
1993            antimodes(vec![1_f64, 1.0, 2.0, 3.0, 3.0].into_iter()),
1994            (vec![2.0], 1, 1)
1995        );
1996    }
1997
1998    #[test]
1999    fn test_custom_percentiles() {
2000        // Test with integers
2001        let mut unsorted: Unsorted<i32> = Unsorted::new();
2002        unsorted.extend(1..=11); // [1,2,3,4,5,6,7,8,9,10,11]
2003
2004        let result = unsorted.custom_percentiles(&[25, 50, 75]).unwrap();
2005        assert_eq!(result, vec![3, 6, 9]);
2006
2007        // Test with strings
2008        let mut str_data = Unsorted::new();
2009        str_data.extend(vec!["a", "b", "c", "d", "e"]);
2010        let result = str_data.custom_percentiles(&[20, 40, 60, 80]).unwrap();
2011        assert_eq!(result, vec!["a", "b", "c", "d"]);
2012
2013        // Test with chars
2014        let mut char_data = Unsorted::new();
2015        char_data.extend('a'..='e');
2016        let result = char_data.custom_percentiles(&[25, 50, 75]).unwrap();
2017        assert_eq!(result, vec!['b', 'c', 'd']);
2018
2019        // Test with floats
2020        let mut float_data = Unsorted::new();
2021        float_data.extend(vec![1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9]);
2022        let result = float_data
2023            .custom_percentiles(&[10, 30, 50, 70, 90])
2024            .unwrap();
2025        assert_eq!(result, vec![1.1, 3.3, 5.5, 7.7, 9.9]);
2026
2027        // Test with empty percentiles array
2028        let result = float_data.custom_percentiles(&[]).unwrap();
2029        assert_eq!(result, Vec::<f64>::new());
2030
2031        // Test with duplicate percentiles
2032        let result = float_data.custom_percentiles(&[50, 50, 50]).unwrap();
2033        assert_eq!(result, vec![5.5]);
2034
2035        // Test with extreme percentiles
2036        let result = float_data.custom_percentiles(&[0, 100]).unwrap();
2037        assert_eq!(result, vec![1.1, 9.9]);
2038
2039        // Test with unsorted percentiles
2040        let result = float_data.custom_percentiles(&[75, 25, 50]).unwrap();
2041        assert_eq!(result, vec![3.3, 5.5, 7.7]); // results always sorted
2042
2043        // Test with single element
2044        let mut single = Unsorted::new();
2045        single.add(42);
2046        let result = single.custom_percentiles(&[0, 50, 100]).unwrap();
2047        assert_eq!(result, vec![42, 42, 42]);
2048    }
2049
2050    #[test]
2051    fn quartiles_stream() {
2052        assert_eq!(
2053            quartiles(vec![3usize, 5, 7].into_iter()),
2054            Some((3., 5., 7.))
2055        );
2056        assert_eq!(
2057            quartiles(vec![3usize, 5, 7, 9].into_iter()),
2058            Some((4., 6., 8.))
2059        );
2060        assert_eq!(
2061            quartiles(vec![1usize, 2, 7, 11].into_iter()),
2062            Some((1.5, 4.5, 9.))
2063        );
2064        assert_eq!(
2065            quartiles(vec![3usize, 5, 7, 9, 12].into_iter()),
2066            Some((4., 7., 10.5))
2067        );
2068        assert_eq!(
2069            quartiles(vec![2usize, 2, 3, 8, 10].into_iter()),
2070            Some((2., 3., 9.))
2071        );
2072        assert_eq!(
2073            quartiles(vec![3usize, 5, 7, 9, 12, 20].into_iter()),
2074            Some((5., 8., 12.))
2075        );
2076        assert_eq!(
2077            quartiles(vec![0usize, 2, 4, 8, 10, 11].into_iter()),
2078            Some((2., 6., 10.))
2079        );
2080        assert_eq!(
2081            quartiles(vec![3usize, 5, 7, 9, 12, 20, 21].into_iter()),
2082            Some((5., 9., 20.))
2083        );
2084        assert_eq!(
2085            quartiles(vec![1usize, 5, 6, 6, 7, 10, 19].into_iter()),
2086            Some((5., 6., 10.))
2087        );
2088    }
2089
2090    #[test]
2091    fn quartiles_floats() {
2092        assert_eq!(
2093            quartiles(vec![3_f64, 5., 7.].into_iter()),
2094            Some((3., 5., 7.))
2095        );
2096        assert_eq!(
2097            quartiles(vec![3_f64, 5., 7., 9.].into_iter()),
2098            Some((4., 6., 8.))
2099        );
2100        assert_eq!(
2101            quartiles(vec![3_f64, 5., 7., 9., 12.].into_iter()),
2102            Some((4., 7., 10.5))
2103        );
2104        assert_eq!(
2105            quartiles(vec![3_f64, 5., 7., 9., 12., 20.].into_iter()),
2106            Some((5., 8., 12.))
2107        );
2108        assert_eq!(
2109            quartiles(vec![3_f64, 5., 7., 9., 12., 20., 21.].into_iter()),
2110            Some((5., 9., 20.))
2111        );
2112    }
2113
2114    #[test]
2115    fn test_quartiles_zero_copy_small() {
2116        // Test edge cases for zero-copy quartiles
2117        let unsorted: Unsorted<i32> = Unsorted::new();
2118        assert_eq!(unsorted.quartiles_zero_copy(), None);
2119
2120        let mut unsorted = Unsorted::new();
2121        unsorted.extend(vec![1, 2]);
2122        assert_eq!(unsorted.quartiles_zero_copy(), None);
2123
2124        let mut unsorted = Unsorted::new();
2125        unsorted.extend(vec![1, 2, 3]);
2126        assert_eq!(unsorted.quartiles_zero_copy(), Some((1.0, 2.0, 3.0)));
2127
2128        // Test larger case
2129        let mut unsorted = Unsorted::new();
2130        unsorted.extend(vec![3, 5, 7, 9]);
2131        assert_eq!(unsorted.quartiles_zero_copy(), Some((4.0, 6.0, 8.0)));
2132    }
2133
2134    #[test]
2135    fn gini_empty() {
2136        let mut unsorted: Unsorted<i32> = Unsorted::new();
2137        assert_eq!(unsorted.gini(None), None);
2138        let empty_vec: Vec<i32> = vec![];
2139        assert_eq!(gini(empty_vec.into_iter(), None), None);
2140    }
2141
2142    #[test]
2143    fn gini_single_element() {
2144        let mut unsorted = Unsorted::new();
2145        unsorted.add(5);
2146        assert_eq!(unsorted.gini(None), Some(0.0));
2147        assert_eq!(gini(vec![5].into_iter(), None), Some(0.0));
2148    }
2149
2150    #[test]
2151    fn gini_perfect_equality() {
2152        // All values are the same - perfect equality, Gini = 0
2153        let mut unsorted = Unsorted::new();
2154        unsorted.extend(vec![10, 10, 10, 10, 10]);
2155        let result = unsorted.gini(None).unwrap();
2156        assert!((result - 0.0).abs() < 1e-10, "Expected 0.0, got {}", result);
2157
2158        assert!((gini(vec![10, 10, 10, 10, 10].into_iter(), None).unwrap() - 0.0).abs() < 1e-10);
2159    }
2160
2161    #[test]
2162    fn gini_perfect_inequality() {
2163        // One value has everything, others have zero - perfect inequality
2164        // For [0, 0, 0, 0, 100], Gini should be close to 1
2165        let mut unsorted = Unsorted::new();
2166        unsorted.extend(vec![0, 0, 0, 0, 100]);
2167        let result = unsorted.gini(None).unwrap();
2168        // Perfect inequality should give Gini close to 1
2169        // For n=5, one value=100, others=0: G = (2*5*100)/(5*100) - 6/5 = 2 - 1.2 = 0.8
2170        assert!((result - 0.8).abs() < 1e-10, "Expected 0.8, got {}", result);
2171    }
2172
2173    #[test]
2174    fn gini_stream() {
2175        // Test with known values
2176        // For [1, 2, 3, 4, 5]:
2177        // sum = 15
2178        // weighted_sum = 1*1 + 2*2 + 3*3 + 4*4 + 5*5 = 1 + 4 + 9 + 16 + 25 = 55
2179        // n = 5
2180        // G = (2 * 55) / (5 * 15) - 6/5 = 110/75 - 1.2 = 1.4667 - 1.2 = 0.2667
2181        let result = gini(vec![1usize, 2, 3, 4, 5].into_iter(), None).unwrap();
2182        let expected = (2.0 * 55.0) / (5.0 * 15.0) - 6.0 / 5.0;
2183        assert!(
2184            (result - expected).abs() < 1e-10,
2185            "Expected {}, got {}",
2186            expected,
2187            result
2188        );
2189    }
2190
2191    #[test]
2192    fn gini_floats() {
2193        let mut unsorted = Unsorted::new();
2194        unsorted.extend(vec![1.0, 2.0, 3.0, 4.0, 5.0]);
2195        let result = unsorted.gini(None).unwrap();
2196        let expected = (2.0 * 55.0) / (5.0 * 15.0) - 6.0 / 5.0;
2197        assert!((result - expected).abs() < 1e-10);
2198
2199        assert!(
2200            (gini(vec![1.0f64, 2.0, 3.0, 4.0, 5.0].into_iter(), None).unwrap() - expected).abs()
2201                < 1e-10
2202        );
2203    }
2204
2205    #[test]
2206    fn gini_all_zeros() {
2207        // All zeros - sum is zero, Gini is undefined
2208        let mut unsorted = Unsorted::new();
2209        unsorted.extend(vec![0, 0, 0, 0]);
2210        assert_eq!(unsorted.gini(None), None);
2211        assert_eq!(gini(vec![0, 0, 0, 0].into_iter(), None), None);
2212    }
2213
2214    #[test]
2215    fn gini_negative_values() {
2216        // Test with negative values (mathematically valid)
2217        let mut unsorted = Unsorted::new();
2218        unsorted.extend(vec![-5, -3, -1, 1, 3, 5]);
2219        let result = unsorted.gini(None);
2220        // Sum is 0, so Gini is undefined
2221        assert_eq!(result, None);
2222
2223        // Test with negative values that don't sum to zero
2224        let mut unsorted = Unsorted::new();
2225        unsorted.extend(vec![-2, -1, 0, 1, 2]);
2226        let result = unsorted.gini(None);
2227        // Sum is 0, so Gini is undefined
2228        assert_eq!(result, None);
2229
2230        // Test with values containing negatives that sum to non-zero
2231        // Gini is undefined for negative values, should return None
2232        let mut unsorted = Unsorted::new();
2233        unsorted.extend(vec![-1, 0, 1, 2, 3]);
2234        let result = unsorted.gini(None);
2235        assert_eq!(result, None);
2236    }
2237
2238    #[test]
2239    fn gini_known_cases() {
2240        // Test case: [1, 1, 1, 1, 1] - perfect equality
2241        let mut unsorted = Unsorted::new();
2242        unsorted.extend(vec![1, 1, 1, 1, 1]);
2243        let result = unsorted.gini(None).unwrap();
2244        assert!((result - 0.0).abs() < 1e-10);
2245
2246        // Test case: [0, 0, 0, 0, 1] - high inequality
2247        let mut unsorted = Unsorted::new();
2248        unsorted.extend(vec![0, 0, 0, 0, 1]);
2249        let result = unsorted.gini(None).unwrap();
2250        // G = (2 * 5 * 1) / (5 * 1) - 6/5 = 2 - 1.2 = 0.8
2251        assert!((result - 0.8).abs() < 1e-10);
2252
2253        // Test case: [1, 2, 3] - moderate inequality
2254        let mut unsorted = Unsorted::new();
2255        unsorted.extend(vec![1, 2, 3]);
2256        let result = unsorted.gini(None).unwrap();
2257        // sum = 6, weighted_sum = 1*1 + 2*2 + 3*3 = 1 + 4 + 9 = 14
2258        // G = (2 * 14) / (3 * 6) - 4/3 = 28/18 - 4/3 = 1.5556 - 1.3333 = 0.2222
2259        let expected = (2.0 * 14.0) / (3.0 * 6.0) - 4.0 / 3.0;
2260        assert!((result - expected).abs() < 1e-10);
2261    }
2262
2263    #[test]
2264    fn gini_precalc_sum() {
2265        // Test with pre-calculated sum
2266        let mut unsorted = Unsorted::new();
2267        unsorted.extend(vec![1, 2, 3, 4, 5]);
2268        let precalc_sum = Some(15.0);
2269        let result = unsorted.gini(precalc_sum).unwrap();
2270        let expected = (2.0 * 55.0) / (5.0 * 15.0) - 6.0 / 5.0;
2271        assert!((result - expected).abs() < 1e-10);
2272
2273        // Test that pre-calculated sum gives same result
2274        let mut unsorted2 = Unsorted::new();
2275        unsorted2.extend(vec![1, 2, 3, 4, 5]);
2276        let result2 = unsorted2.gini(None).unwrap();
2277        assert!((result - result2).abs() < 1e-10);
2278    }
2279
2280    #[test]
2281    fn gini_large_dataset() {
2282        // Test with larger dataset to exercise parallel path
2283        let data: Vec<i32> = (1..=1000).collect();
2284        let result = gini(data.iter().copied(), None);
2285        assert!(result.is_some());
2286        let gini_val = result.unwrap();
2287        // For uniform distribution, Gini should be positive but not too high
2288        assert!(gini_val > 0.0 && gini_val < 0.5);
2289    }
2290
2291    #[test]
2292    fn gini_unsorted_vs_sorted() {
2293        // Test that sorting doesn't affect result
2294        let mut unsorted1 = Unsorted::new();
2295        unsorted1.extend(vec![5, 2, 8, 1, 9, 3, 7, 4, 6]);
2296        let result1 = unsorted1.gini(None).unwrap();
2297
2298        let mut unsorted2 = Unsorted::new();
2299        unsorted2.extend(vec![1, 2, 3, 4, 5, 6, 7, 8, 9]);
2300        let result2 = unsorted2.gini(None).unwrap();
2301
2302        assert!((result1 - result2).abs() < 1e-10);
2303    }
2304
2305    #[test]
2306    fn gini_small_values() {
2307        // Test with very small values
2308        let mut unsorted = Unsorted::new();
2309        unsorted.extend(vec![0.001, 0.002, 0.003, 0.004, 0.005]);
2310        let result = unsorted.gini(None);
2311        assert!(result.is_some());
2312        // Should be same as [1, 2, 3, 4, 5] scaled down
2313        let expected = (2.0 * 55.0) / (5.0 * 15.0) - 6.0 / 5.0;
2314        assert!((result.unwrap() - expected).abs() < 1e-10);
2315    }
2316
2317    #[test]
2318    fn gini_large_values() {
2319        // Test with large values
2320        let mut unsorted = Unsorted::new();
2321        unsorted.extend(vec![1000, 2000, 3000, 4000, 5000]);
2322        let result = unsorted.gini(None);
2323        assert!(result.is_some());
2324        // Should be same as [1, 2, 3, 4, 5] scaled up
2325        let expected = (2.0 * 55.0) / (5.0 * 15.0) - 6.0 / 5.0;
2326        assert!((result.unwrap() - expected).abs() < 1e-10);
2327    }
2328
2329    #[test]
2330    fn gini_two_elements() {
2331        // Test with exactly 2 elements
2332        let mut unsorted = Unsorted::new();
2333        unsorted.extend(vec![1, 2]);
2334        let result = unsorted.gini(None).unwrap();
2335        // For [1, 2]: sum=3, weighted_sum=1*1+2*2=5, n=2
2336        // G = (2*5)/(2*3) - 3/2 = 10/6 - 1.5 = 1.6667 - 1.5 = 0.1667
2337        let expected = (2.0 * 5.0) / (2.0 * 3.0) - 3.0 / 2.0;
2338        assert!((result - expected).abs() < 1e-10);
2339    }
2340
2341    #[test]
2342    fn gini_precalc_sum_zero() {
2343        // Test with pre-calculated sum of zero (should return None)
2344        let mut unsorted = Unsorted::new();
2345        unsorted.extend(vec![1, 2, 3, 4, 5]);
2346        let result = unsorted.gini(Some(0.0));
2347        assert_eq!(result, None);
2348    }
2349
2350    #[test]
2351    fn gini_precalc_sum_negative() {
2352        // Gini is undefined for negative values, should return None
2353        let mut unsorted = Unsorted::new();
2354        unsorted.extend(vec![-5, -3, -1, 1, 3]);
2355        let result = unsorted.gini(None);
2356        assert_eq!(result, None);
2357
2358        // Negative precalculated sum should also return None
2359        let mut unsorted = Unsorted::new();
2360        unsorted.extend(vec![1, 2, 3]);
2361        let result = unsorted.gini(Some(-5.0));
2362        assert_eq!(result, None);
2363    }
2364
2365    #[test]
2366    fn gini_different_types() {
2367        // Test with different integer types
2368        let mut unsorted_u32 = Unsorted::new();
2369        unsorted_u32.extend(vec![1u32, 2, 3, 4, 5]);
2370        let result_u32 = unsorted_u32.gini(None).unwrap();
2371
2372        let mut unsorted_i64 = Unsorted::new();
2373        unsorted_i64.extend(vec![1i64, 2, 3, 4, 5]);
2374        let result_i64 = unsorted_i64.gini(None).unwrap();
2375
2376        let expected = (2.0 * 55.0) / (5.0 * 15.0) - 6.0 / 5.0;
2377        assert!((result_u32 - expected).abs() < 1e-10);
2378        assert!((result_i64 - expected).abs() < 1e-10);
2379    }
2380
2381    #[test]
2382    fn gini_extreme_inequality() {
2383        // Test with extreme inequality: one very large value, many zeros
2384        let mut unsorted = Unsorted::new();
2385        unsorted.extend(vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 1000]);
2386        let result = unsorted.gini(None).unwrap();
2387        // For [0,0,0,0,0,0,0,0,0,1000]: sum=1000, weighted_sum=10*1000=10000, n=10
2388        // G = (2*10000)/(10*1000) - 11/10 = 20/10 - 1.1 = 2 - 1.1 = 0.9
2389        assert!((result - 0.9).abs() < 1e-10);
2390    }
2391
2392    #[test]
2393    fn gini_duplicate_values() {
2394        // Test with many duplicate values
2395        let mut unsorted = Unsorted::new();
2396        unsorted.extend(vec![1, 1, 1, 5, 5, 5, 10, 10, 10]);
2397        let result = unsorted.gini(None);
2398        assert!(result.is_some());
2399        // Should be between 0 and 1
2400        let gini_val = result.unwrap();
2401        assert!((0.0..=1.0).contains(&gini_val));
2402    }
2403
2404    #[test]
2405    fn kurtosis_empty() {
2406        let mut unsorted: Unsorted<i32> = Unsorted::new();
2407        assert_eq!(unsorted.kurtosis(None, None), None);
2408        let empty_vec: Vec<i32> = vec![];
2409        assert_eq!(kurtosis(empty_vec.into_iter(), None, None), None);
2410    }
2411
2412    #[test]
2413    fn kurtosis_small() {
2414        // Need at least 4 elements
2415        let mut unsorted = Unsorted::new();
2416        unsorted.extend(vec![1, 2]);
2417        assert_eq!(unsorted.kurtosis(None, None), None);
2418
2419        let mut unsorted = Unsorted::new();
2420        unsorted.extend(vec![1, 2, 3]);
2421        assert_eq!(unsorted.kurtosis(None, None), None);
2422    }
2423
2424    #[test]
2425    fn kurtosis_normal_distribution() {
2426        // Normal distribution should have kurtosis close to 0
2427        let mut unsorted = Unsorted::new();
2428        unsorted.extend(vec![1, 2, 3, 4, 5]);
2429        let result = unsorted.kurtosis(None, None);
2430        assert!(result.is_some());
2431        // For small samples, kurtosis can vary significantly
2432    }
2433
2434    #[test]
2435    fn kurtosis_all_same() {
2436        // All same values - variance is 0, kurtosis undefined
2437        let mut unsorted = Unsorted::new();
2438        unsorted.extend(vec![5, 5, 5, 5]);
2439        assert_eq!(unsorted.kurtosis(None, None), None);
2440    }
2441
2442    #[test]
2443    fn kurtosis_stream() {
2444        let result = kurtosis(vec![1usize, 2, 3, 4, 5].into_iter(), None, None);
2445        assert!(result.is_some());
2446    }
2447
2448    #[test]
2449    fn kurtosis_precalc_mean_variance() {
2450        // Test with pre-calculated mean and variance
2451        let mut unsorted = Unsorted::new();
2452        unsorted.extend(vec![1, 2, 3, 4, 5]);
2453
2454        // Calculate mean and variance manually
2455        let mean = 3.0f64;
2456        let variance = ((1.0f64 - 3.0).powi(2)
2457            + (2.0f64 - 3.0).powi(2)
2458            + (3.0f64 - 3.0).powi(2)
2459            + (4.0f64 - 3.0).powi(2)
2460            + (5.0f64 - 3.0).powi(2))
2461            / 5.0;
2462
2463        let result = unsorted.kurtosis(Some(mean), Some(variance));
2464        assert!(result.is_some());
2465
2466        // Test that pre-calculated values give same result
2467        let mut unsorted2 = Unsorted::new();
2468        unsorted2.extend(vec![1, 2, 3, 4, 5]);
2469        let result2 = unsorted2.kurtosis(None, None);
2470        assert!((result.unwrap() - result2.unwrap()).abs() < 1e-10);
2471    }
2472
2473    #[test]
2474    fn kurtosis_precalc_mean_only() {
2475        // Test with pre-calculated mean only
2476        let mut unsorted = Unsorted::new();
2477        unsorted.extend(vec![1, 2, 3, 4, 5]);
2478        let mean = 3.0f64;
2479
2480        let result = unsorted.kurtosis(Some(mean), None);
2481        assert!(result.is_some());
2482
2483        // Test that pre-calculated mean gives same result
2484        let mut unsorted2 = Unsorted::new();
2485        unsorted2.extend(vec![1, 2, 3, 4, 5]);
2486        let result2 = unsorted2.kurtosis(None, None);
2487        assert!((result.unwrap() - result2.unwrap()).abs() < 1e-10);
2488    }
2489
2490    #[test]
2491    fn kurtosis_precalc_variance_only() {
2492        // Test with pre-calculated variance only
2493        let mut unsorted = Unsorted::new();
2494        unsorted.extend(vec![1, 2, 3, 4, 5]);
2495        let variance = ((1.0f64 - 3.0).powi(2)
2496            + (2.0f64 - 3.0).powi(2)
2497            + (3.0f64 - 3.0).powi(2)
2498            + (4.0f64 - 3.0).powi(2)
2499            + (5.0f64 - 3.0).powi(2))
2500            / 5.0;
2501
2502        let result = unsorted.kurtosis(None, Some(variance));
2503        assert!(result.is_some());
2504
2505        // Test that pre-calculated variance gives same result
2506        let mut unsorted2 = Unsorted::new();
2507        unsorted2.extend(vec![1, 2, 3, 4, 5]);
2508        let result2 = unsorted2.kurtosis(None, None);
2509        assert!((result.unwrap() - result2.unwrap()).abs() < 1e-10);
2510    }
2511
2512    #[test]
2513    fn kurtosis_exact_calculation() {
2514        // Test with exact calculation for [1, 2, 3, 4]
2515        // Mean = 2.5
2516        // Variance = ((1-2.5)^2 + (2-2.5)^2 + (3-2.5)^2 + (4-2.5)^2) / 4 = (2.25 + 0.25 + 0.25 + 2.25) / 4 = 1.25
2517        // Variance^2 = 1.5625
2518        // Fourth powers: (1-2.5)^4 + (2-2.5)^4 + (3-2.5)^4 + (4-2.5)^4 = 5.0625 + 0.0625 + 0.0625 + 5.0625 = 10.25
2519        // n = 4
2520        // Kurtosis = (4*5*10.25) / (3*2*1*1.5625) - 3*3*3/(2*1) = 205 / 9.375 - 13.5 = 21.8667 - 13.5 = 8.3667
2521        let mut unsorted = Unsorted::new();
2522        unsorted.extend(vec![1, 2, 3, 4]);
2523        let result = unsorted.kurtosis(None, None).unwrap();
2524        // For small samples, kurtosis can be very high
2525        assert!(result.is_finite());
2526    }
2527
2528    #[test]
2529    fn kurtosis_uniform_distribution() {
2530        // Uniform distribution should have negative excess kurtosis
2531        let mut unsorted = Unsorted::new();
2532        unsorted.extend(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
2533        let result = unsorted.kurtosis(None, None).unwrap();
2534        // Uniform distribution has excess kurtosis = -1.2
2535        // But for small samples, it can vary significantly
2536        assert!(result.is_finite());
2537    }
2538
2539    #[test]
2540    fn kurtosis_large_dataset() {
2541        // Test with larger dataset to exercise parallel path
2542        let data: Vec<i32> = (1..=1000).collect();
2543        let result = kurtosis(data.iter().copied(), None, None);
2544        assert!(result.is_some());
2545        let kurt_val = result.unwrap();
2546        assert!(kurt_val.is_finite());
2547    }
2548
2549    #[test]
2550    fn kurtosis_unsorted_vs_sorted() {
2551        // Test that sorting doesn't affect result
2552        let mut unsorted1 = Unsorted::new();
2553        unsorted1.extend(vec![5, 2, 8, 1, 9, 3, 7, 4, 6]);
2554        let result1 = unsorted1.kurtosis(None, None).unwrap();
2555
2556        let mut unsorted2 = Unsorted::new();
2557        unsorted2.extend(vec![1, 2, 3, 4, 5, 6, 7, 8, 9]);
2558        let result2 = unsorted2.kurtosis(None, None).unwrap();
2559
2560        assert!((result1 - result2).abs() < 1e-10);
2561    }
2562
2563    #[test]
2564    fn kurtosis_minimum_size() {
2565        // Test with exactly 4 elements (minimum required)
2566        let mut unsorted = Unsorted::new();
2567        unsorted.extend(vec![1, 2, 3, 4]);
2568        let result = unsorted.kurtosis(None, None);
2569        assert!(result.is_some());
2570        assert!(result.unwrap().is_finite());
2571    }
2572
2573    #[test]
2574    fn kurtosis_heavy_tailed() {
2575        // Test with heavy-tailed distribution (outliers)
2576        let mut unsorted = Unsorted::new();
2577        unsorted.extend(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 100]);
2578        let result = unsorted.kurtosis(None, None).unwrap();
2579        // Heavy tails should give positive excess kurtosis
2580        assert!(result.is_finite());
2581        // With an outlier, kurtosis should be positive
2582        assert!(result > -10.0); // Allow some variance but should be reasonable
2583    }
2584
2585    #[test]
2586    fn kurtosis_light_tailed() {
2587        // Test with light-tailed distribution (values close together)
2588        let mut unsorted = Unsorted::new();
2589        unsorted.extend(vec![10, 11, 12, 13, 14, 15, 16, 17, 18, 19]);
2590        let result = unsorted.kurtosis(None, None).unwrap();
2591        // Light tails might give negative excess kurtosis
2592        assert!(result.is_finite());
2593    }
2594
2595    #[test]
2596    fn kurtosis_small_variance() {
2597        // Test with very small variance (values very close together)
2598        let mut unsorted = Unsorted::new();
2599        unsorted.extend(vec![10.0, 10.001, 10.002, 10.003, 10.004]);
2600        let result = unsorted.kurtosis(None, None);
2601        // Should still compute (variance is very small but non-zero)
2602        assert!(result.is_some());
2603        assert!(result.unwrap().is_finite());
2604    }
2605
2606    #[test]
2607    fn kurtosis_precalc_zero_variance() {
2608        // Test with pre-calculated variance of zero (should return None)
2609        let mut unsorted = Unsorted::new();
2610        unsorted.extend(vec![1, 2, 3, 4, 5]);
2611        let result = unsorted.kurtosis(None, Some(0.0));
2612        assert_eq!(result, None);
2613    }
2614
2615    #[test]
2616    fn kurtosis_precalc_negative_variance() {
2617        // Test with negative variance (invalid, but should handle gracefully)
2618        let mut unsorted = Unsorted::new();
2619        unsorted.extend(vec![1, 2, 3, 4, 5]);
2620        // Negative variance is invalid, but function should handle it
2621        let result = unsorted.kurtosis(None, Some(-1.0));
2622        // Should either return None or handle it gracefully
2623        // The function computes variance_sq = variance^2, so negative becomes positive
2624        // But this is invalid input, so behavior may vary
2625        // For now, just check it doesn't panic
2626        let _ = result;
2627    }
2628
2629    #[test]
2630    fn kurtosis_different_types() {
2631        // Test with different integer types
2632        let mut unsorted_u32 = Unsorted::new();
2633        unsorted_u32.extend(vec![1u32, 2, 3, 4, 5]);
2634        let result_u32 = unsorted_u32.kurtosis(None, None).unwrap();
2635
2636        let mut unsorted_i64 = Unsorted::new();
2637        unsorted_i64.extend(vec![1i64, 2, 3, 4, 5]);
2638        let result_i64 = unsorted_i64.kurtosis(None, None).unwrap();
2639
2640        assert!((result_u32 - result_i64).abs() < 1e-10);
2641    }
2642
2643    #[test]
2644    fn kurtosis_floating_point_precision() {
2645        // Test floating point precision
2646        let mut unsorted = Unsorted::new();
2647        unsorted.extend(vec![1.1, 2.2, 3.3, 4.4, 5.5]);
2648        let result = unsorted.kurtosis(None, None);
2649        assert!(result.is_some());
2650        assert!(result.unwrap().is_finite());
2651    }
2652
2653    #[test]
2654    fn kurtosis_negative_values() {
2655        // Test with negative values
2656        let mut unsorted = Unsorted::new();
2657        unsorted.extend(vec![-5, -3, -1, 1, 3, 5]);
2658        let result = unsorted.kurtosis(None, None);
2659        assert!(result.is_some());
2660        assert!(result.unwrap().is_finite());
2661    }
2662
2663    #[test]
2664    fn kurtosis_mixed_positive_negative() {
2665        // Test with mixed positive and negative values
2666        let mut unsorted = Unsorted::new();
2667        unsorted.extend(vec![-10, -5, 0, 5, 10]);
2668        let result = unsorted.kurtosis(None, None);
2669        assert!(result.is_some());
2670        assert!(result.unwrap().is_finite());
2671    }
2672
2673    #[test]
2674    fn kurtosis_duplicate_values() {
2675        // Test with duplicate values (but not all same)
2676        let mut unsorted = Unsorted::new();
2677        unsorted.extend(vec![1, 1, 2, 2, 3, 3, 4, 4, 5, 5]);
2678        let result = unsorted.kurtosis(None, None);
2679        assert!(result.is_some());
2680        assert!(result.unwrap().is_finite());
2681    }
2682
2683    #[test]
2684    fn kurtosis_precalc_mean_wrong() {
2685        // Test that wrong pre-calculated mean gives wrong result
2686        let mut unsorted1 = Unsorted::new();
2687        unsorted1.extend(vec![1, 2, 3, 4, 5]);
2688        let correct_result = unsorted1.kurtosis(None, None).unwrap();
2689
2690        let mut unsorted2 = Unsorted::new();
2691        unsorted2.extend(vec![1, 2, 3, 4, 5]);
2692        let wrong_mean = 10.0; // Wrong mean
2693        let wrong_result = unsorted2.kurtosis(Some(wrong_mean), None).unwrap();
2694
2695        // Results should be different
2696        assert!((correct_result - wrong_result).abs() > 1e-5);
2697    }
2698
2699    #[test]
2700    fn percentile_rank_empty() {
2701        let mut unsorted: Unsorted<i32> = Unsorted::new();
2702        assert_eq!(unsorted.percentile_rank(5), None);
2703        let empty_vec: Vec<i32> = vec![];
2704        assert_eq!(percentile_rank(empty_vec.into_iter(), 5), None);
2705    }
2706
2707    #[test]
2708    fn percentile_rank_basic() {
2709        let mut unsorted = Unsorted::new();
2710        unsorted.extend(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
2711
2712        // Value less than all
2713        assert_eq!(unsorted.percentile_rank(0), Some(0.0));
2714
2715        // Value greater than all
2716        assert_eq!(unsorted.percentile_rank(11), Some(100.0));
2717
2718        // Median (5) should be around 50th percentile
2719        let rank = unsorted.percentile_rank(5).unwrap();
2720        assert!((rank - 50.0).abs() < 1.0);
2721
2722        // First value should be at 10th percentile
2723        let rank = unsorted.percentile_rank(1).unwrap();
2724        assert!((rank - 10.0).abs() < 1.0);
2725    }
2726
2727    #[test]
2728    fn percentile_rank_duplicates() {
2729        let mut unsorted = Unsorted::new();
2730        unsorted.extend(vec![1, 1, 2, 2, 3, 3, 4, 4, 5, 5]);
2731
2732        // Value 2 appears twice, should be at 40th percentile (4 values <= 2)
2733        let rank = unsorted.percentile_rank(2).unwrap();
2734        assert!((rank - 40.0).abs() < 1.0);
2735    }
2736
2737    #[test]
2738    fn percentile_rank_stream() {
2739        let result = percentile_rank(vec![1usize, 2, 3, 4, 5].into_iter(), 3);
2740        assert_eq!(result, Some(60.0)); // 3 out of 5 values <= 3
2741    }
2742
2743    #[test]
2744    fn percentile_rank_many_ties() {
2745        // 100 copies of 5 followed by 100 copies of 10 — tests O(log n) upper bound
2746        let mut unsorted = Unsorted::new();
2747        for _ in 0..100 {
2748            unsorted.add(5u32);
2749        }
2750        for _ in 0..100 {
2751            unsorted.add(10u32);
2752        }
2753        // 100 values <= 5 out of 200
2754        let rank = unsorted.percentile_rank(5).unwrap();
2755        assert!((rank - 50.0).abs() < f64::EPSILON);
2756        // All 200 values <= 10
2757        let mut unsorted2 = Unsorted::new();
2758        for _ in 0..100 {
2759            unsorted2.add(5u32);
2760        }
2761        for _ in 0..100 {
2762            unsorted2.add(10u32);
2763        }
2764        let rank = unsorted2.percentile_rank(10).unwrap();
2765        assert!((rank - 100.0).abs() < f64::EPSILON);
2766    }
2767
2768    #[test]
2769    fn atkinson_empty() {
2770        let mut unsorted: Unsorted<i32> = Unsorted::new();
2771        assert_eq!(unsorted.atkinson(1.0, None, None), None);
2772        let empty_vec: Vec<i32> = vec![];
2773        assert_eq!(atkinson(empty_vec.into_iter(), 1.0, None, None), None);
2774    }
2775
2776    #[test]
2777    fn atkinson_single_element() {
2778        let mut unsorted = Unsorted::new();
2779        unsorted.add(5);
2780        assert_eq!(unsorted.atkinson(1.0, None, None), Some(0.0));
2781        assert_eq!(atkinson(vec![5].into_iter(), 1.0, None, None), Some(0.0));
2782    }
2783
2784    #[test]
2785    fn atkinson_perfect_equality() {
2786        // All values the same - perfect equality, Atkinson = 0
2787        let mut unsorted = Unsorted::new();
2788        unsorted.extend(vec![10, 10, 10, 10, 10]);
2789        let result = unsorted.atkinson(1.0, None, None).unwrap();
2790        assert!((result - 0.0).abs() < 1e-10);
2791    }
2792
2793    #[test]
2794    fn atkinson_epsilon_zero() {
2795        // Epsilon = 0 means no inequality aversion, should return 0
2796        let mut unsorted = Unsorted::new();
2797        unsorted.extend(vec![1, 2, 3, 4, 5]);
2798        let result = unsorted.atkinson(0.0, None, None).unwrap();
2799        assert!((result - 0.0).abs() < 1e-10);
2800    }
2801
2802    #[test]
2803    fn atkinson_epsilon_one() {
2804        // Epsilon = 1 uses geometric mean
2805        let mut unsorted = Unsorted::new();
2806        unsorted.extend(vec![1, 2, 3, 4, 5]);
2807        let result = unsorted.atkinson(1.0, None, None);
2808        assert!(result.is_some());
2809    }
2810
2811    #[test]
2812    fn atkinson_negative_epsilon() {
2813        let mut unsorted = Unsorted::new();
2814        unsorted.extend(vec![1, 2, 3, 4, 5]);
2815        assert_eq!(unsorted.atkinson(-1.0, None, None), None);
2816    }
2817
2818    #[test]
2819    fn atkinson_zero_mean() {
2820        // If mean is zero, Atkinson is undefined
2821        let mut unsorted = Unsorted::new();
2822        unsorted.extend(vec![0, 0, 0, 0]);
2823        assert_eq!(unsorted.atkinson(1.0, None, None), None);
2824    }
2825
2826    #[test]
2827    fn atkinson_stream() {
2828        let result = atkinson(vec![1usize, 2, 3, 4, 5].into_iter(), 1.0, None, None);
2829        assert!(result.is_some());
2830    }
2831
2832    #[test]
2833    fn atkinson_precalc_mean_geometric_sum() {
2834        // Test with pre-calculated mean and geometric_sum
2835        let mut unsorted = Unsorted::new();
2836        unsorted.extend(vec![1, 2, 3, 4, 5]);
2837
2838        // Calculate mean and geometric_sum manually
2839        let mean = 3.0f64;
2840        let geometric_sum = 1.0f64.ln() + 2.0f64.ln() + 3.0f64.ln() + 4.0f64.ln() + 5.0f64.ln();
2841
2842        let result = unsorted.atkinson(1.0, Some(mean), Some(geometric_sum));
2843        assert!(result.is_some());
2844
2845        // Test that pre-calculated values give same result
2846        let mut unsorted2 = Unsorted::new();
2847        unsorted2.extend(vec![1, 2, 3, 4, 5]);
2848        let result2 = unsorted2.atkinson(1.0, None, None);
2849        assert!((result.unwrap() - result2.unwrap()).abs() < 1e-10);
2850    }
2851
2852    #[test]
2853    fn atkinson_precalc_mean_only() {
2854        // Test with pre-calculated mean only
2855        let mut unsorted = Unsorted::new();
2856        unsorted.extend(vec![1, 2, 3, 4, 5]);
2857        let mean = 3.0f64;
2858
2859        let result = unsorted.atkinson(1.0, Some(mean), None);
2860        assert!(result.is_some());
2861
2862        // Test that pre-calculated mean gives same result
2863        let mut unsorted2 = Unsorted::new();
2864        unsorted2.extend(vec![1, 2, 3, 4, 5]);
2865        let result2 = unsorted2.atkinson(1.0, None, None);
2866        assert!((result.unwrap() - result2.unwrap()).abs() < 1e-10);
2867    }
2868
2869    #[test]
2870    fn atkinson_precalc_geometric_sum_only() {
2871        // Test with pre-calculated geometric_sum only
2872        let mut unsorted = Unsorted::new();
2873        unsorted.extend(vec![1, 2, 3, 4, 5]);
2874        let geometric_sum = 1.0f64.ln() + 2.0f64.ln() + 3.0f64.ln() + 4.0f64.ln() + 5.0f64.ln();
2875
2876        let result = unsorted.atkinson(1.0, None, Some(geometric_sum));
2877        assert!(result.is_some());
2878
2879        // Test that pre-calculated geometric_sum gives same result
2880        let mut unsorted2 = Unsorted::new();
2881        unsorted2.extend(vec![1, 2, 3, 4, 5]);
2882        let result2 = unsorted2.atkinson(1.0, None, None);
2883        assert!((result.unwrap() - result2.unwrap()).abs() < 1e-10);
2884    }
2885
2886    #[test]
2887    fn test_median_with_infinity() {
2888        let mut unsorted = Unsorted::new();
2889        unsorted.extend(vec![1.0f64, 2.0, f64::INFINITY]);
2890        assert_eq!(unsorted.median(), Some(2.0));
2891    }
2892
2893    #[test]
2894    fn test_median_with_neg_infinity() {
2895        let mut unsorted = Unsorted::new();
2896        unsorted.extend(vec![f64::NEG_INFINITY, 1.0f64, 2.0]);
2897        assert_eq!(unsorted.median(), Some(1.0));
2898    }
2899
2900    #[test]
2901    fn test_quartiles_with_infinity() {
2902        let mut unsorted = Unsorted::new();
2903        unsorted.extend(vec![f64::NEG_INFINITY, 1.0, 2.0, 3.0, f64::INFINITY]);
2904        let q = unsorted.quartiles();
2905        // Q2 (median) should be 2.0
2906        assert!(q.is_some());
2907        let (_, q2, _) = q.unwrap();
2908        assert_eq!(q2, 2.0);
2909    }
2910
2911    #[test]
2912    fn test_mode_with_nan() {
2913        // NaN breaks the Ord contract via Partial<T>, so sort order is
2914        // non-deterministic. We only verify the call doesn't panic —
2915        // the exact mode value depends on where NaN lands after sorting.
2916        let mut unsorted: Unsorted<f64> = Unsorted::new();
2917        unsorted.extend(vec![1.0, f64::NAN, 2.0, 2.0, 3.0]);
2918        let _result = unsorted.mode(); // must not panic
2919    }
2920
2921    #[test]
2922    fn test_gini_with_infinity() {
2923        let mut unsorted = Unsorted::new();
2924        unsorted.extend(vec![1.0f64, 2.0, f64::INFINITY]);
2925        let g = unsorted.gini(None);
2926        // Gini with infinity in the data: the weighted_sum/sum ratio involves
2927        // Inf/Inf which is NaN, so the result is Some(NaN) — not a meaningful
2928        // Gini coefficient, but importantly does not panic
2929        assert!(g.unwrap().is_nan());
2930    }
2931
2932    #[test]
2933    fn test_cardinality_with_infinity() {
2934        let mut unsorted = Unsorted::new();
2935        unsorted.extend(vec![1.0f64, f64::INFINITY, f64::NEG_INFINITY, 1.0]);
2936        assert_eq!(unsorted.cardinality(false, 10_000), 3);
2937    }
2938}
2939
2940#[cfg(test)]
2941mod bench {
2942    use super::*;
2943    use std::time::Instant;
2944
2945    #[test]
2946    #[ignore] // Run with `cargo test comprehensive_quartiles_benchmark -- --ignored --nocapture` to see performance comparison
2947    fn comprehensive_quartiles_benchmark() {
2948        // Test a wide range of data sizes
2949        let data_sizes = vec![
2950            1_000, 10_000, 100_000, 500_000, 1_000_000, 2_000_000, 5_000_000, 10_000_000,
2951        ];
2952
2953        println!("=== COMPREHENSIVE QUARTILES BENCHMARK ===\n");
2954
2955        for size in data_sizes {
2956            println!("--- Testing with {} elements ---", size);
2957
2958            // Test different data patterns
2959            let test_patterns = vec![
2960                ("Random", generate_random_data(size)),
2961                ("Reverse Sorted", {
2962                    let mut v = Vec::with_capacity(size);
2963                    for x in (0..size).rev() {
2964                        v.push(x as i32);
2965                    }
2966                    v
2967                }),
2968                ("Already Sorted", {
2969                    let mut v = Vec::with_capacity(size);
2970                    for x in 0..size {
2971                        v.push(x as i32);
2972                    }
2973                    v
2974                }),
2975                ("Many Duplicates", {
2976                    // Create a vector with just a few distinct values repeated many times
2977                    let mut v = Vec::with_capacity(size);
2978                    let chunk_size = size / 100;
2979                    for i in 0..100 {
2980                        v.extend(std::iter::repeat_n(i, chunk_size));
2981                    }
2982                    // Add any remaining elements
2983                    v.extend(std::iter::repeat_n(0, size - v.len()));
2984                    v
2985                }),
2986            ];
2987
2988            for (pattern_name, test_data) in test_patterns {
2989                println!("\n  Pattern: {}", pattern_name);
2990
2991                // Benchmark sorting-based approach
2992                let mut unsorted1 = Unsorted::new();
2993                unsorted1.extend(test_data.clone());
2994
2995                let start = Instant::now();
2996                let result_sorted = unsorted1.quartiles();
2997                let sorted_time = start.elapsed();
2998
2999                // Benchmark selection-based approach (with copying)
3000                let mut unsorted2 = Unsorted::new();
3001                unsorted2.extend(test_data.clone());
3002
3003                let start = Instant::now();
3004                let result_selection = unsorted2.quartiles_with_selection();
3005                let selection_time = start.elapsed();
3006
3007                // Benchmark zero-copy selection-based approach
3008                let mut unsorted3 = Unsorted::new();
3009                unsorted3.extend(test_data);
3010
3011                let start = Instant::now();
3012                let result_zero_copy = unsorted3.quartiles_zero_copy();
3013                let zero_copy_time = start.elapsed();
3014
3015                // Verify results are the same
3016                assert_eq!(result_sorted, result_selection);
3017                assert_eq!(result_sorted, result_zero_copy);
3018
3019                let selection_speedup =
3020                    sorted_time.as_nanos() as f64 / selection_time.as_nanos() as f64;
3021                let zero_copy_speedup =
3022                    sorted_time.as_nanos() as f64 / zero_copy_time.as_nanos() as f64;
3023
3024                println!("    Sorting:       {:>12?}", sorted_time);
3025                println!(
3026                    "    Selection:     {:>12?} (speedup: {:.2}x)",
3027                    selection_time, selection_speedup
3028                );
3029                println!(
3030                    "    Zero-copy:     {:>12?} (speedup: {:.2}x)",
3031                    zero_copy_time, zero_copy_speedup
3032                );
3033
3034                let best_algorithm =
3035                    if zero_copy_speedup > 1.0 && zero_copy_speedup >= selection_speedup {
3036                        "ZERO-COPY"
3037                    } else if selection_speedup > 1.0 {
3038                        "SELECTION"
3039                    } else {
3040                        "SORTING"
3041                    };
3042                println!("    Best: {}", best_algorithm);
3043            }
3044
3045            println!(); // Add blank line between sizes
3046        }
3047    }
3048
3049    // Generate random data for benchmarking
3050    fn generate_random_data(size: usize) -> Vec<i32> {
3051        // Simple LCG random number generator for reproducible results
3052        let mut rng = 1234567u64;
3053        let mut vec = Vec::with_capacity(size);
3054        for _ in 0..size {
3055            rng = rng.wrapping_mul(1103515245).wrapping_add(12345);
3056            vec.push((rng >> 16) as i32);
3057        }
3058        vec
3059    }
3060
3061    #[test]
3062    #[ignore] // Run with `cargo test find_selection_threshold -- --ignored --nocapture` to find exact threshold
3063    fn find_selection_threshold() {
3064        println!("=== FINDING SELECTION ALGORITHM THRESHOLD ===\n");
3065
3066        // Binary search approach to find the threshold
3067        let mut found_threshold = None;
3068        let test_sizes = vec![
3069            1_000_000, 2_000_000, 3_000_000, 4_000_000, 5_000_000, 7_500_000, 10_000_000,
3070            15_000_000, 20_000_000, 25_000_000, 30_000_000,
3071        ];
3072
3073        for size in test_sizes {
3074            println!("Testing size: {}", size);
3075
3076            // Use random data as it's most representative of real-world scenarios
3077            let test_data = generate_random_data(size);
3078
3079            // Run multiple iterations to get average performance
3080            let iterations = 3;
3081            let mut sorting_total = 0u128;
3082            let mut selection_total = 0u128;
3083            let mut zero_copy_total = 0u128;
3084
3085            for i in 0..iterations {
3086                println!("  Iteration {}/{}", i + 1, iterations);
3087
3088                // Sorting approach
3089                let mut unsorted1 = Unsorted::new();
3090                unsorted1.extend(test_data.clone());
3091
3092                let start = Instant::now();
3093                let _result_sorted = unsorted1.quartiles();
3094                sorting_total += start.elapsed().as_nanos();
3095
3096                // Selection approach (with copying)
3097                let mut unsorted2 = Unsorted::new();
3098                unsorted2.extend(test_data.clone());
3099
3100                let start = Instant::now();
3101                let _result_selection = unsorted2.quartiles_with_selection();
3102                selection_total += start.elapsed().as_nanos();
3103
3104                // Zero-copy selection approach
3105                let mut unsorted3 = Unsorted::new();
3106                unsorted3.extend(test_data.clone());
3107
3108                let start = Instant::now();
3109                let _result_zero_copy = unsorted3.quartiles_zero_copy();
3110                zero_copy_total += start.elapsed().as_nanos();
3111            }
3112
3113            let avg_sorting = sorting_total / iterations as u128;
3114            let avg_selection = selection_total / iterations as u128;
3115            let avg_zero_copy = zero_copy_total / iterations as u128;
3116            let selection_speedup = avg_sorting as f64 / avg_selection as f64;
3117            let zero_copy_speedup = avg_sorting as f64 / avg_zero_copy as f64;
3118
3119            println!(
3120                "  Average sorting:    {:>12.2}ms",
3121                avg_sorting as f64 / 1_000_000.0
3122            );
3123            println!(
3124                "  Average selection:  {:>12.2}ms (speedup: {:.2}x)",
3125                avg_selection as f64 / 1_000_000.0,
3126                selection_speedup
3127            );
3128            println!(
3129                "  Average zero-copy:  {:>12.2}ms (speedup: {:.2}x)",
3130                avg_zero_copy as f64 / 1_000_000.0,
3131                zero_copy_speedup
3132            );
3133
3134            if (selection_speedup > 1.0 || zero_copy_speedup > 1.0) && found_threshold.is_none() {
3135                found_threshold = Some(size);
3136                let best_method = if zero_copy_speedup > selection_speedup {
3137                    "Zero-copy"
3138                } else {
3139                    "Selection"
3140                };
3141                println!(
3142                    "  *** THRESHOLD FOUND: {} becomes faster at {} elements ***",
3143                    best_method, size
3144                );
3145            }
3146
3147            println!();
3148        }
3149
3150        match found_threshold {
3151            Some(threshold) => println!(
3152                "🎯 Selection algorithm becomes faster at approximately {} elements",
3153                threshold
3154            ),
3155            None => println!("❌ Selection algorithm did not become faster in the tested range"),
3156        }
3157    }
3158
3159    #[test]
3160    #[ignore] // Run with `cargo test benchmark_different_data_types -- --ignored --nocapture` to test different data types
3161    fn benchmark_different_data_types() {
3162        println!("=== BENCHMARKING DIFFERENT DATA TYPES ===\n");
3163
3164        let size = 5_000_000; // Use a large size where differences might be visible
3165
3166        // Test with f64 (floating point)
3167        println!("Testing with f64 data:");
3168        let float_data: Vec<f64> = generate_random_data(size)
3169            .into_iter()
3170            .map(|x| x as f64 / 1000.0)
3171            .collect();
3172
3173        let mut unsorted1 = Unsorted::new();
3174        unsorted1.extend(float_data.clone());
3175        let start = Instant::now();
3176        let _result = unsorted1.quartiles();
3177        let sorting_time = start.elapsed();
3178
3179        let mut unsorted2 = Unsorted::new();
3180        unsorted2.extend(float_data.clone());
3181        let start = Instant::now();
3182        let _result = unsorted2.quartiles_with_selection();
3183        let selection_time = start.elapsed();
3184
3185        let mut unsorted3 = Unsorted::new();
3186        unsorted3.extend(float_data);
3187        let start = Instant::now();
3188        let _result = unsorted3.quartiles_zero_copy();
3189        let zero_copy_time = start.elapsed();
3190
3191        println!("  Sorting:    {:?}", sorting_time);
3192        println!("  Selection:  {:?}", selection_time);
3193        println!("  Zero-copy:  {:?}", zero_copy_time);
3194        println!(
3195            "  Selection Speedup:  {:.2}x",
3196            sorting_time.as_nanos() as f64 / selection_time.as_nanos() as f64
3197        );
3198        println!(
3199            "  Zero-copy Speedup:  {:.2}x\n",
3200            sorting_time.as_nanos() as f64 / zero_copy_time.as_nanos() as f64
3201        );
3202
3203        // Test with i64 (larger integers)
3204        println!("Testing with i64 data:");
3205        let int64_data: Vec<i64> = generate_random_data(size)
3206            .into_iter()
3207            .map(|x| x as i64 * 1000)
3208            .collect();
3209
3210        let mut unsorted1 = Unsorted::new();
3211        unsorted1.extend(int64_data.clone());
3212        let start = Instant::now();
3213        let _result = unsorted1.quartiles();
3214        let sorting_time = start.elapsed();
3215
3216        let mut unsorted2 = Unsorted::new();
3217        unsorted2.extend(int64_data.clone());
3218        let start = Instant::now();
3219        let _result = unsorted2.quartiles_with_selection();
3220        let selection_time = start.elapsed();
3221
3222        let mut unsorted3 = Unsorted::new();
3223        unsorted3.extend(int64_data);
3224        let start = Instant::now();
3225        let _result = unsorted3.quartiles_zero_copy();
3226        let zero_copy_time = start.elapsed();
3227
3228        println!("  Sorting:    {:?}", sorting_time);
3229        println!("  Selection:  {:?}", selection_time);
3230        println!("  Zero-copy:  {:?}", zero_copy_time);
3231        println!(
3232            "  Selection Speedup:  {:.2}x",
3233            sorting_time.as_nanos() as f64 / selection_time.as_nanos() as f64
3234        );
3235        println!(
3236            "  Zero-copy Speedup:  {:.2}x",
3237            sorting_time.as_nanos() as f64 / zero_copy_time.as_nanos() as f64
3238        );
3239    }
3240}
stats/unsorted.rs

stats/
unsorted.rs