stats/
unsorted.rs

1use num_traits::ToPrimitive;
2use rayon::iter::{IndexedParallelIterator, IntoParallelRefIterator, ParallelIterator};
3use rayon::prelude::ParallelSlice;
4use rayon::slice::ParallelSliceMut;
5
6use serde::{Deserialize, Serialize};
7
8use {crate::Commute, crate::Partial};
9
10// PARALLEL_THRESHOLD (10,000) is the minimum dataset size for rayon parallel sort.
11// The separate 10,240 threshold in cardinality estimation (5 × 2,048) is aligned to
12// cache-line-friendly chunk sizes for parallel iterator reduction.
13const PARALLEL_THRESHOLD: usize = 10_000;
14
15/// Compute the exact median on a stream of data.
16///
17/// (This has time complexity `O(nlogn)` and space complexity `O(n)`.)
18#[inline]
19pub fn median<I>(it: I) -> Option<f64>
20where
21    I: Iterator,
22    <I as Iterator>::Item: PartialOrd + ToPrimitive + Send,
23{
24    it.collect::<Unsorted<_>>().median()
25}
26
27/// Compute the median absolute deviation (MAD) on a stream of data.
28#[inline]
29pub fn mad<I>(it: I, precalc_median: Option<f64>) -> Option<f64>
30where
31    I: Iterator,
32    <I as Iterator>::Item: PartialOrd + ToPrimitive + Send + Sync,
33{
34    it.collect::<Unsorted<_>>().mad(precalc_median)
35}
36
37/// Compute the exact 1-, 2-, and 3-quartiles (Q1, Q2 a.k.a. median, and Q3) on a stream of data.
38///
39/// (This has time complexity `O(n log n)` and space complexity `O(n)`.)
40#[inline]
41pub fn quartiles<I>(it: I) -> Option<(f64, f64, f64)>
42where
43    I: Iterator,
44    <I as Iterator>::Item: PartialOrd + ToPrimitive + Send,
45{
46    it.collect::<Unsorted<_>>().quartiles()
47}
48
49/// Compute the exact mode on a stream of data.
50///
51/// (This has time complexity `O(nlogn)` and space complexity `O(n)`.)
52///
53/// If the data does not have a mode, then `None` is returned.
54#[inline]
55pub fn mode<T, I>(it: I) -> Option<T>
56where
57    T: PartialOrd + Clone + Send,
58    I: Iterator<Item = T>,
59{
60    it.collect::<Unsorted<T>>().mode()
61}
62
63/// Compute the modes on a stream of data.
64///
65/// If there is a single mode, then only that value is returned in the `Vec`
66/// however, if there are multiple values tied for occurring the most amount of times
67/// those values are returned.
68///
69/// ## Example
70/// ```
71/// use stats;
72///
73/// let vals = vec![1, 1, 2, 2, 3];
74///
75/// assert_eq!(stats::modes(vals.into_iter()), (vec![1, 2], 2, 2));
76/// ```
77/// This has time complexity `O(n)`
78///
79/// If the data does not have a mode, then an empty `Vec` is returned.
80#[inline]
81pub fn modes<T, I>(it: I) -> (Vec<T>, usize, u32)
82where
83    T: PartialOrd + Clone + Send,
84    I: Iterator<Item = T>,
85{
86    it.collect::<Unsorted<T>>().modes()
87}
88
89/// Compute the antimodes on a stream of data.
90///
91/// Antimode is the least frequent non-zero score.
92///
93/// If there is a single antimode, then only that value is returned in the `Vec`
94/// however, if there are multiple values tied for occurring the least amount of times
95/// those values are returned.
96///
97/// Only the first 10 antimodes are returned to prevent returning the whole set
98/// when cardinality = number of records (i.e. all unique values).
99///
100/// ## Example
101/// ```
102/// use stats;
103///
104/// let vals = vec![1, 1, 2, 2, 3];
105///
106/// assert_eq!(stats::antimodes(vals.into_iter()), (vec![3], 1, 1));
107/// ```
108/// This has time complexity `O(n)`
109///
110/// If the data does not have an antimode, then an empty `Vec` is returned.
111#[inline]
112pub fn antimodes<T, I>(it: I) -> (Vec<T>, usize, u32)
113where
114    T: PartialOrd + Clone + Send,
115    I: Iterator<Item = T>,
116{
117    let (antimodes_result, antimodes_count, antimodes_occurrences) =
118        it.collect::<Unsorted<T>>().antimodes();
119    (antimodes_result, antimodes_count, antimodes_occurrences)
120}
121
122/// Compute the Gini Coefficient on a stream of data.
123///
124/// The Gini Coefficient measures inequality in a distribution, ranging from 0 (perfect equality)
125/// to 1 (perfect inequality).
126///
127/// (This has time complexity `O(n log n)` and space complexity `O(n)`.)
128#[inline]
129pub fn gini<I>(it: I, precalc_sum: Option<f64>) -> Option<f64>
130where
131    I: Iterator,
132    <I as Iterator>::Item: PartialOrd + ToPrimitive + Send + Sync,
133{
134    it.collect::<Unsorted<_>>().gini(precalc_sum)
135}
136
137/// Compute the kurtosis (excess kurtosis) on a stream of data.
138///
139/// Kurtosis measures the "tailedness" of a distribution. Excess kurtosis is kurtosis - 3,
140/// where 0 indicates a normal distribution, positive values indicate heavy tails, and
141/// negative values indicate light tails.
142///
143/// (This has time complexity `O(n log n)` and space complexity `O(n)`.)
144#[inline]
145pub fn kurtosis<I>(it: I, precalc_mean: Option<f64>, precalc_variance: Option<f64>) -> Option<f64>
146where
147    I: Iterator,
148    <I as Iterator>::Item: PartialOrd + ToPrimitive + Send + Sync,
149{
150    it.collect::<Unsorted<_>>()
151        .kurtosis(precalc_mean, precalc_variance)
152}
153
154/// Compute the percentile rank of a value on a stream of data.
155///
156/// Returns the percentile rank (0-100) of the given value in the distribution.
157/// If the value is less than all values, returns 0.0. If greater than all, returns 100.0.
158///
159/// (This has time complexity `O(n log n)` and space complexity `O(n)`.)
160#[inline]
161pub fn percentile_rank<I, V>(it: I, value: V) -> Option<f64>
162where
163    I: Iterator,
164    <I as Iterator>::Item: PartialOrd + ToPrimitive + Send + Sync,
165    V: PartialOrd + ToPrimitive,
166{
167    it.collect::<Unsorted<_>>().percentile_rank(value)
168}
169
170/// Compute the Atkinson Index on a stream of data.
171///
172/// The Atkinson Index measures inequality with an inequality aversion parameter ε.
173/// It ranges from 0 (perfect equality) to 1 (perfect inequality).
174/// Higher ε values give more weight to inequality at the lower end of the distribution.
175///
176/// (This has time complexity `O(n log n)` and space complexity `O(n)`.)
177#[inline]
178pub fn atkinson<I>(
179    it: I,
180    epsilon: f64,
181    precalc_mean: Option<f64>,
182    precalc_geometric_sum: Option<f64>,
183) -> Option<f64>
184where
185    I: Iterator,
186    <I as Iterator>::Item: PartialOrd + ToPrimitive + Send + Sync,
187{
188    it.collect::<Unsorted<_>>()
189        .atkinson(epsilon, precalc_mean, precalc_geometric_sum)
190}
191
192fn median_on_sorted<T>(data: &[T]) -> Option<f64>
193where
194    T: PartialOrd + ToPrimitive,
195{
196    Some(match data.len() {
197        // Empty slice case - return None early
198        0 => {
199            core::hint::cold_path();
200            return None;
201        }
202        // Single element case - return that element converted to f64
203        1 => data.first()?.to_f64()?,
204        // Even length case - average the two middle elements
205        len if len.is_multiple_of(2) => {
206            let idx = len / 2;
207            // Safety: we know these indices are valid because we checked len is even and non-zero,
208            // so idx-1 and idx are valid indices into data
209            let v1 = unsafe { data.get_unchecked(idx - 1) }.to_f64()?;
210            let v2 = unsafe { data.get_unchecked(idx) }.to_f64()?;
211            f64::midpoint(v1, v2)
212        }
213        // Odd length case - return the middle element
214        // Safety: we know the index is within bounds
215        len => unsafe { data.get_unchecked(len / 2) }.to_f64()?,
216    })
217}
218
219fn mad_on_sorted<T>(data: &[T], precalc_median: Option<f64>) -> Option<f64>
220where
221    T: Sync + PartialOrd + ToPrimitive,
222{
223    if data.is_empty() {
224        core::hint::cold_path();
225        return None;
226    }
227    let median_obs = precalc_median.unwrap_or_else(|| median_on_sorted(data).unwrap());
228
229    // Use adaptive parallel processing based on data size
230    let mut abs_diff_vec: Vec<f64> = if data.len() < PARALLEL_THRESHOLD {
231        // Sequential processing for small datasets
232        // Iterator collect enables TrustedLen optimization, eliminating per-element bounds checks
233        data.iter()
234            // SAFETY: to_f64() always returns Some for standard numeric types (f32/f64, i/u 8-64)
235            .map(|x| (median_obs - unsafe { x.to_f64().unwrap_unchecked() }).abs())
236            .collect()
237    } else {
238        // Parallel processing for large datasets
239        data.par_iter()
240            // SAFETY: to_f64() always returns Some for standard numeric types
241            .map(|x| (median_obs - unsafe { x.to_f64().unwrap_unchecked() }).abs())
242            .collect()
243    };
244
245    // Use select_nth_unstable to find the median in O(n) instead of O(n log n) full sort
246    let len = abs_diff_vec.len();
247    let mid = len / 2;
248    let cmp = |a: &f64, b: &f64| a.total_cmp(b);
249
250    abs_diff_vec.select_nth_unstable_by(mid, cmp);
251
252    if len.is_multiple_of(2) {
253        // Even length: need both mid-1 and mid elements
254        let right = abs_diff_vec[mid];
255        // The left partition [0..mid] contains elements <= abs_diff_vec[mid],
256        // so we can find the max of the left partition for mid-1
257        let left = abs_diff_vec[..mid]
258            .iter()
259            .max_by(|a, b| cmp(a, b))
260            .copied()?;
261        Some(f64::midpoint(left, right))
262    } else {
263        Some(abs_diff_vec[mid])
264    }
265}
266
267fn gini_on_sorted<T>(data: &[Partial<T>], precalc_sum: Option<f64>) -> Option<f64>
268where
269    T: Sync + PartialOrd + ToPrimitive,
270{
271    let len = data.len();
272
273    // Early return for empty data
274    if len == 0 {
275        core::hint::cold_path();
276        return None;
277    }
278
279    // Single element case: perfect equality, Gini = 0
280    if len == 1 {
281        core::hint::cold_path();
282        return Some(0.0);
283    }
284
285    // Gini coefficient is only defined for non-negative distributions.
286    // Since data is sorted, check the first (smallest) element.
287    // SAFETY: len > 1 guaranteed by checks above
288    let first_val = unsafe { data.get_unchecked(0).0.to_f64().unwrap_unchecked() };
289    if first_val < 0.0 {
290        core::hint::cold_path();
291        return None;
292    }
293
294    // Compute sum and weighted sum.
295    // When precalc_sum is provided, only compute weighted_sum in a single pass.
296    // When not provided, fuse both computations into a single pass over the data
297    // to halve cache pressure (following the fold/reduce pattern used in kurtosis).
298    let (sum, weighted_sum) = if let Some(precalc) = precalc_sum {
299        if precalc < 0.0 {
300            core::hint::cold_path();
301            return None;
302        }
303        // Only need weighted_sum — single pass
304        let weighted_sum = if len < PARALLEL_THRESHOLD {
305            let mut weighted_sum = 0.0;
306            for (i, x) in data.iter().enumerate() {
307                // SAFETY: to_f64() always returns Some for standard numeric types
308                let val = unsafe { x.0.to_f64().unwrap_unchecked() };
309                weighted_sum = ((i + 1) as f64).mul_add(val, weighted_sum);
310            }
311            weighted_sum
312        } else {
313            data.par_iter()
314                .enumerate()
315                .map(|(i, x)| {
316                    // SAFETY: to_f64() always returns Some for standard numeric types
317                    let val = unsafe { x.0.to_f64().unwrap_unchecked() };
318                    (i + 1) as f64 * val
319                })
320                .sum()
321        };
322        (precalc, weighted_sum)
323    } else if len < PARALLEL_THRESHOLD {
324        // Fused single pass: compute both sum and weighted_sum together
325        let mut sum = 0.0;
326        let mut weighted_sum = 0.0;
327        for (i, x) in data.iter().enumerate() {
328            // SAFETY: to_f64() always returns Some for standard numeric types (f32/f64, i/u 8-64)
329            let val = unsafe { x.0.to_f64().unwrap_unchecked() };
330            sum += val;
331            weighted_sum = ((i + 1) as f64).mul_add(val, weighted_sum);
332        }
333        (sum, weighted_sum)
334    } else {
335        // Fused parallel single pass using fold/reduce
336        data.par_iter()
337            .enumerate()
338            .fold(
339                || (0.0_f64, 0.0_f64),
340                |acc, (i, x)| {
341                    // SAFETY: to_f64() always returns Some for standard numeric types
342                    let val = unsafe { x.0.to_f64().unwrap_unchecked() };
343                    (acc.0 + val, ((i + 1) as f64).mul_add(val, acc.1))
344                },
345            )
346            .reduce(|| (0.0, 0.0), |a, b| (a.0 + b.0, a.1 + b.1))
347    };
348
349    // If sum is zero, Gini is undefined
350    if sum == 0.0 {
351        core::hint::cold_path();
352        return None;
353    }
354
355    // Compute Gini coefficient using the formula:
356    // G = (2 * Σ(i * y_i)) / (n * Σ(y_i)) - (n + 1) / n
357    // where i is 1-indexed rank and y_i are sorted values
358    let n = len as f64;
359    let gini = 2.0f64.mul_add(weighted_sum / (n * sum), -(n + 1.0) / n);
360
361    Some(gini)
362}
363
364fn kurtosis_on_sorted<T>(
365    data: &[Partial<T>],
366    precalc_mean: Option<f64>,
367    precalc_variance: Option<f64>,
368) -> Option<f64>
369where
370    T: Sync + PartialOrd + ToPrimitive,
371{
372    let len = data.len();
373
374    // Need at least 4 elements for meaningful kurtosis
375    if len < 4 {
376        core::hint::cold_path();
377        return None;
378    }
379
380    // Use pre-calculated mean if provided, otherwise compute it
381    let mean = precalc_mean.unwrap_or_else(|| {
382        let sum: f64 = if len < PARALLEL_THRESHOLD {
383            // Iterator sum enables auto-vectorization (SIMD) by the compiler
384            data.iter()
385                // SAFETY: to_f64() always returns Some for standard numeric types (f32/f64, i/u 8-64)
386                .map(|x| unsafe { x.0.to_f64().unwrap_unchecked() })
387                .sum()
388        } else {
389            data.par_iter()
390                // SAFETY: to_f64() always returns Some for standard numeric types
391                .map(|x| unsafe { x.0.to_f64().unwrap_unchecked() })
392                .sum()
393        };
394        sum / len as f64
395    });
396
397    // Compute variance_sq and fourth_power_sum
398    // If variance is provided, we can compute variance_sq directly (variance_sq = variance^2)
399    // Otherwise, we need to compute variance from the data
400    let (variance_sq, fourth_power_sum) = if let Some(variance) = precalc_variance {
401        // Negative variance is invalid (possible floating-point rounding artifact)
402        if variance < 0.0 {
403            core::hint::cold_path();
404            return None;
405        }
406        // Use pre-calculated variance: variance_sq = variance^2
407        let variance_sq = variance * variance;
408
409        // Still need to compute fourth_power_sum
410        let fourth_power_sum = if len < PARALLEL_THRESHOLD {
411            let mut sum = 0.0;
412            for x in data {
413                // SAFETY: to_f64() always returns Some for standard numeric types
414                let val = unsafe { x.0.to_f64().unwrap_unchecked() };
415                let diff = val - mean;
416                let diff_sq = diff * diff;
417                sum = diff_sq.mul_add(diff_sq, sum);
418            }
419            sum
420        } else {
421            data.par_iter()
422                .map(|x| {
423                    // SAFETY: to_f64() always returns Some for standard numeric types
424                    let val = unsafe { x.0.to_f64().unwrap_unchecked() };
425                    let diff = val - mean;
426                    let diff_sq = diff * diff;
427                    diff_sq * diff_sq
428                })
429                .sum()
430        };
431
432        (variance_sq, fourth_power_sum)
433    } else {
434        // Compute both variance_sum and fourth_power_sum
435        let (variance_sum, fourth_power_sum) = if len < PARALLEL_THRESHOLD {
436            let mut variance_sum = 0.0;
437            let mut fourth_power_sum = 0.0;
438
439            for x in data {
440                // SAFETY: to_f64() always returns Some for standard numeric types
441                let val = unsafe { x.0.to_f64().unwrap_unchecked() };
442                let diff = val - mean;
443                let diff_sq = diff * diff;
444                variance_sum += diff_sq;
445                fourth_power_sum = diff_sq.mul_add(diff_sq, fourth_power_sum);
446            }
447
448            (variance_sum, fourth_power_sum)
449        } else {
450            // Single pass computing both sums simultaneously
451            data.par_iter()
452                .fold(
453                    || (0.0_f64, 0.0_f64),
454                    |acc, x| {
455                        // SAFETY: to_f64() always returns Some for standard numeric types
456                        let val = unsafe { x.0.to_f64().unwrap_unchecked() };
457                        let diff = val - mean;
458                        let diff_sq = diff * diff;
459                        (acc.0 + diff_sq, diff_sq.mul_add(diff_sq, acc.1))
460                    },
461                )
462                .reduce(|| (0.0, 0.0), |a, b| (a.0 + b.0, a.1 + b.1))
463        };
464
465        let variance = variance_sum / len as f64;
466
467        // If variance is zero, all values are the same, kurtosis is undefined
468        if variance == 0.0 {
469            core::hint::cold_path();
470            return None;
471        }
472
473        let variance_sq = variance * variance;
474        (variance_sq, fourth_power_sum)
475    };
476
477    // If variance_sq is zero, all values are the same, kurtosis is undefined
478    if variance_sq == 0.0 {
479        core::hint::cold_path();
480        return None;
481    }
482
483    let n = len as f64;
484
485    // Sample excess kurtosis formula:
486    // kurtosis = (n(n+1) * Σ((x_i - mean)⁴)) / ((n-1)(n-2)(n-3) * variance²) - 3(n-1)²/((n-2)(n-3))
487    let denominator = (n - 1.0) * (n - 2.0) * (n - 3.0);
488    let adjustment = 3.0 * (n - 1.0) * (n - 1.0) / denominator;
489    let kurtosis =
490        (n * (n + 1.0) * fourth_power_sum).mul_add(1.0 / (denominator * variance_sq), -adjustment);
491
492    Some(kurtosis)
493}
494
495fn percentile_rank_on_sorted<T, V>(data: &[Partial<T>], value: &V) -> Option<f64>
496where
497    T: PartialOrd + ToPrimitive,
498    V: PartialOrd + ToPrimitive,
499{
500    let len = data.len();
501
502    if len == 0 {
503        core::hint::cold_path();
504        return None;
505    }
506
507    let value_f64 = value.to_f64()?;
508
509    // Binary search to find the position where value would be inserted
510    // This gives us the number of values <= value
511    let count_leq = data.binary_search_by(|x| {
512        x.0.to_f64()
513            .unwrap_or(f64::NAN)
514            .partial_cmp(&value_f64)
515            .unwrap_or(std::cmp::Ordering::Less)
516    });
517
518    let count = match count_leq {
519        Ok(idx) => {
520            // Value found at idx — use partition_point (O(log n)) to find the upper bound
521            // of equal values instead of a linear scan
522            let upper = data[idx + 1..].partition_point(|x| {
523                x.0.to_f64()
524                    .is_some_and(|v| v.total_cmp(&value_f64).is_le())
525            });
526            idx + 1 + upper
527        }
528        Err(idx) => idx, // Number of values less than value
529    };
530
531    // Percentile rank = (count / n) * 100
532    Some((count as f64 / len as f64) * 100.0)
533}
534
535fn atkinson_on_sorted<T>(
536    data: &[Partial<T>],
537    epsilon: f64,
538    precalc_mean: Option<f64>,
539    precalc_geometric_sum: Option<f64>,
540) -> Option<f64>
541where
542    T: Sync + PartialOrd + ToPrimitive,
543{
544    let len = data.len();
545
546    // Early return for empty data
547    if len == 0 {
548        core::hint::cold_path();
549        return None;
550    }
551
552    // Single element case: perfect equality, Atkinson = 0
553    if len == 1 {
554        core::hint::cold_path();
555        return Some(0.0);
556    }
557
558    // Epsilon must be non-negative
559    if epsilon < 0.0 {
560        core::hint::cold_path();
561        return None;
562    }
563
564    let epsilon_is_one = (epsilon - 1.0).abs() < 1e-10;
565
566    // Fused fast path: epsilon=1 with no precalc — compute sum (for mean) and
567    // ln_sum (for geometric_sum) in a single pass over data, halving memory bandwidth.
568    if epsilon_is_one && precalc_mean.is_none() && precalc_geometric_sum.is_none() {
569        // Explicit NaN check plus `v <= 0.0` rejects NaN, zero, negatives, and
570        // -infinity. The non-fused epsilon=1 branch rejects NaN via a post-sum
571        // `is_nan()` check; handling it per-element here is equivalent.
572        let (sum, ln_sum, any_invalid) = if len < PARALLEL_THRESHOLD {
573            let mut s = 0.0f64;
574            let mut ls = 0.0f64;
575            let mut bad = false;
576            for x in data {
577                // SAFETY: to_f64() always returns Some for standard numeric types
578                let v = unsafe { x.0.to_f64().unwrap_unchecked() };
579                if v.is_nan() || v <= 0.0 {
580                    bad = true;
581                } else {
582                    s += v;
583                    ls += v.ln();
584                }
585            }
586            (s, ls, bad)
587        } else {
588            data.par_iter()
589                .fold(
590                    || (0.0f64, 0.0f64, false),
591                    |(s, ls, bad), x| {
592                        // SAFETY: to_f64() always returns Some for standard numeric types
593                        let v = unsafe { x.0.to_f64().unwrap_unchecked() };
594                        if v.is_nan() || v <= 0.0 {
595                            (s, ls, true)
596                        } else {
597                            (s + v, ls + v.ln(), bad)
598                        }
599                    },
600                )
601                .reduce(
602                    || (0.0, 0.0, false),
603                    |a, b| (a.0 + b.0, a.1 + b.1, a.2 || b.2),
604                )
605        };
606        if any_invalid {
607            core::hint::cold_path();
608            return None;
609        }
610        let mean = sum / len as f64;
611        if mean == 0.0 {
612            core::hint::cold_path();
613            return None;
614        }
615        let geometric_mean = (ln_sum / len as f64).exp();
616        return Some(1.0 - geometric_mean / mean);
617    }
618
619    // Use pre-calculated mean if provided, otherwise compute it
620    let mean = precalc_mean.unwrap_or_else(|| {
621        let sum: f64 = if len < PARALLEL_THRESHOLD {
622            // Iterator sum enables auto-vectorization (SIMD) by the compiler
623            data.iter()
624                // SAFETY: to_f64() always returns Some for standard numeric types (f32/f64, i/u 8-64)
625                .map(|x| unsafe { x.0.to_f64().unwrap_unchecked() })
626                .sum()
627        } else {
628            data.par_iter()
629                // SAFETY: to_f64() always returns Some for standard numeric types
630                .map(|x| unsafe { x.0.to_f64().unwrap_unchecked() })
631                .sum()
632        };
633        sum / len as f64
634    });
635
636    // If mean is zero, Atkinson is undefined
637    if mean == 0.0 {
638        core::hint::cold_path();
639        return None;
640    }
641
642    // Handle special case: epsilon = 1 (uses geometric mean).
643    // Reached only when precalc_mean and/or precalc_geometric_sum was supplied;
644    // the fully-unsupplied case is handled by the fused fast path above.
645    if epsilon_is_one {
646        // A_1 = 1 - (geometric_mean / mean)
647        let geometric_sum: f64 = if let Some(precalc) = precalc_geometric_sum {
648            precalc
649        } else if len < PARALLEL_THRESHOLD {
650            let mut sum = 0.0;
651            for x in data {
652                // SAFETY: to_f64() always returns Some for standard numeric types
653                let val = unsafe { x.0.to_f64().unwrap_unchecked() };
654                if val <= 0.0 {
655                    // Geometric mean undefined for non-positive values
656                    return None;
657                }
658                sum += val.ln();
659            }
660            sum
661        } else {
662            data.par_iter()
663                .map(|x| {
664                    // SAFETY: to_f64() always returns Some for standard numeric types
665                    let val = unsafe { x.0.to_f64().unwrap_unchecked() };
666                    if val <= 0.0 {
667                        return f64::NAN;
668                    }
669                    val.ln()
670                })
671                .sum()
672        };
673
674        if geometric_sum.is_nan() {
675            core::hint::cold_path();
676            return None;
677        }
678
679        let geometric_mean = (geometric_sum / len as f64).exp();
680        return Some(1.0 - geometric_mean / mean);
681    }
682
683    // General case: epsilon != 1
684    // A_ε = 1 - (1/n * Σ((x_i/mean)^(1-ε)))^(1/(1-ε))
685    let exponent = 1.0 - epsilon;
686    // Hoist reciprocal: replace per-element division with multiplication in the hot loop.
687    let inv_mean = mean.recip();
688
689    let sum_powered: f64 = if len < PARALLEL_THRESHOLD {
690        let mut sum = 0.0;
691        for x in data {
692            // SAFETY: to_f64() always returns Some for standard numeric types
693            let val = unsafe { x.0.to_f64().unwrap_unchecked() };
694            if val < 0.0 {
695                // Negative values with non-integer exponent are undefined
696                return None;
697            }
698            let ratio = val * inv_mean;
699            sum += ratio.powf(exponent);
700        }
701        sum
702    } else {
703        data.par_iter()
704            .map(|x| {
705                // SAFETY: to_f64() always returns Some for standard numeric types
706                let val = unsafe { x.0.to_f64().unwrap_unchecked() };
707                if val < 0.0 {
708                    return f64::NAN;
709                }
710                let ratio = val * inv_mean;
711                ratio.powf(exponent)
712            })
713            .sum()
714    };
715
716    if sum_powered.is_nan() || sum_powered <= 0.0 {
717        core::hint::cold_path();
718        return None;
719    }
720
721    let atkinson = 1.0 - (sum_powered / len as f64).powf(1.0 / exponent);
722    Some(atkinson)
723}
724
725/// Selection algorithm to find the k-th smallest element in O(n) average time.
726/// This is an implementation of quickselect algorithm.
727#[cfg(test)]
728fn quickselect<T>(data: &mut [Partial<T>], k: usize) -> Option<&T>
729where
730    T: PartialOrd,
731{
732    if data.is_empty() || k >= data.len() {
733        core::hint::cold_path();
734        return None;
735    }
736
737    let mut left = 0;
738    let mut right = data.len() - 1;
739
740    loop {
741        if left == right {
742            return Some(&data[left].0);
743        }
744
745        // Use median-of-three pivot selection for better performance
746        let pivot_idx = median_of_three_pivot(data, left, right);
747        let pivot_idx = partition(data, left, right, pivot_idx);
748
749        match k.cmp(&pivot_idx) {
750            std::cmp::Ordering::Equal => return Some(&data[pivot_idx].0),
751            std::cmp::Ordering::Less => right = pivot_idx - 1,
752            std::cmp::Ordering::Greater => left = pivot_idx + 1,
753        }
754    }
755}
756
757/// Select the median of three elements as pivot for better quickselect performance
758#[cfg(test)]
759fn median_of_three_pivot<T>(data: &[Partial<T>], left: usize, right: usize) -> usize
760where
761    T: PartialOrd,
762{
763    let mid = left + (right - left) / 2;
764
765    if data[left] <= data[mid] {
766        if data[mid] <= data[right] {
767            mid
768        } else if data[left] <= data[right] {
769            right
770        } else {
771            left
772        }
773    } else if data[left] <= data[right] {
774        left
775    } else if data[mid] <= data[right] {
776        right
777    } else {
778        mid
779    }
780}
781
782/// Partition function for quickselect
783#[cfg(test)]
784fn partition<T>(data: &mut [Partial<T>], left: usize, right: usize, pivot_idx: usize) -> usize
785where
786    T: PartialOrd,
787{
788    // Move pivot to end
789    data.swap(pivot_idx, right);
790    let mut store_idx = left;
791
792    // Move all elements smaller than pivot to the left
793    // Cache pivot position for better cache locality (access data[right] directly each time)
794    for i in left..right {
795        // Safety: i, store_idx, and right are guaranteed to be in bounds
796        // Compare directly with pivot at data[right] - compiler should optimize this access
797        if unsafe { data.get_unchecked(i) <= data.get_unchecked(right) } {
798            data.swap(i, store_idx);
799            store_idx += 1;
800        }
801    }
802
803    // Move pivot to its final place
804    data.swap(store_idx, right);
805    store_idx
806}
807
808// This implementation follows Method 3 from https://en.wikipedia.org/wiki/Quartile
809// It divides data into quarters based on the length n = 4k + r where r is remainder.
810// For each remainder case (0,1,2,3), it uses different formulas to compute Q1, Q2, Q3.
811fn quartiles_on_sorted<T>(data: &[Partial<T>]) -> Option<(f64, f64, f64)>
812where
813    T: PartialOrd + ToPrimitive,
814{
815    let len = data.len();
816
817    // Early return for small arrays
818    match len {
819        0..=2 => {
820            core::hint::cold_path();
821            return None;
822        }
823        3 => {
824            return Some(
825                // SAFETY: We know these indices are valid because len == 3
826                unsafe {
827                    (
828                        data.get_unchecked(0).0.to_f64()?,
829                        data.get_unchecked(1).0.to_f64()?,
830                        data.get_unchecked(2).0.to_f64()?,
831                    )
832                },
833            );
834        }
835        _ => {}
836    }
837
838    // Calculate k and remainder in one division
839    let k = len / 4;
840    let remainder = len % 4;
841
842    // SAFETY: All index calculations below are guaranteed to be in bounds
843    // because we've verified len >= 4 above and k is len/4
844    unsafe {
845        Some(match remainder {
846            0 => {
847                // Let data = {x_i}_{i=0..4k} where k is positive integer.
848                // Median q2 = (x_{2k-1} + x_{2k}) / 2.
849                // If we divide data into two parts {x_i < q2} as L and
850                // {x_i > q2} as R, #L == #R == 2k holds true. Thus,
851                // q1 = (x_{k-1} + x_{k}) / 2 and q3 = (x_{3k-1} + x_{3k}) / 2.
852                // =============
853                // Simply put: Length is multiple of 4 (4k)
854                // q1 = (x_{k-1} + x_k) / 2
855                // q2 = (x_{2k-1} + x_{2k}) / 2
856                // q3 = (x_{3k-1} + x_{3k}) / 2
857                let q1 = f64::midpoint(
858                    data.get_unchecked(k - 1).0.to_f64()?,
859                    data.get_unchecked(k).0.to_f64()?,
860                );
861                let q2 = f64::midpoint(
862                    data.get_unchecked(2 * k - 1).0.to_f64()?,
863                    data.get_unchecked(2 * k).0.to_f64()?,
864                );
865                let q3 = f64::midpoint(
866                    data.get_unchecked(3 * k - 1).0.to_f64()?,
867                    data.get_unchecked(3 * k).0.to_f64()?,
868                );
869                (q1, q2, q3)
870            }
871            1 => {
872                // Let data = {x_i}_{i=0..4k+1} where k is positive integer.
873                // Median q2 = x_{2k}.
874                // If we divide data other than q2 into two parts {x_i < q2}
875                // as L and {x_i > q2} as R, #L == #R == 2k holds true. Thus,
876                // q1 = (x_{k-1} + x_{k}) / 2 and q3 = (x_{3k} + x_{3k+1}) / 2.
877                // =============
878                // Simply put: Length is 4k + 1
879                // q1 = (x_{k-1} + x_k) / 2
880                // q2 = x_{2k}
881                // q3 = (x_{3k} + x_{3k+1}) / 2
882                let q1 = f64::midpoint(
883                    data.get_unchecked(k - 1).0.to_f64()?,
884                    data.get_unchecked(k).0.to_f64()?,
885                );
886                let q2 = data.get_unchecked(2 * k).0.to_f64()?;
887                let q3 = f64::midpoint(
888                    data.get_unchecked(3 * k).0.to_f64()?,
889                    data.get_unchecked(3 * k + 1).0.to_f64()?,
890                );
891                (q1, q2, q3)
892            }
893            2 => {
894                // Let data = {x_i}_{i=0..4k+2} where k is positive integer.
895                // Median q2 = (x_{(2k+1)-1} + x_{2k+1}) / 2.
896                // If we divide data into two parts {x_i < q2} as L and
897                // {x_i > q2} as R, it's true that #L == #R == 2k+1.
898                // Thus, q1 = x_{k} and q3 = x_{3k+1}.
899                // =============
900                // Simply put: Length is 4k + 2
901                // q1 = x_k
902                // q2 = (x_{2k} + x_{2k+1}) / 2
903                // q3 = x_{3k+1}
904                let q1 = data.get_unchecked(k).0.to_f64()?;
905                let q2 = f64::midpoint(
906                    data.get_unchecked(2 * k).0.to_f64()?,
907                    data.get_unchecked(2 * k + 1).0.to_f64()?,
908                );
909                let q3 = data.get_unchecked(3 * k + 1).0.to_f64()?;
910                (q1, q2, q3)
911            }
912            _ => {
913                // Let data = {x_i}_{i=0..4k+3} where k is positive integer.
914                // Median q2 = x_{2k+1}.
915                // If we divide data other than q2 into two parts {x_i < q2}
916                // as L and {x_i > q2} as R, #L == #R == 2k+1 holds true.
917                // Thus, q1 = x_{k} and q3 = x_{3k+2}.
918                // =============
919                // Simply put: Length is 4k + 3
920                // q1 = x_k
921                // q2 = x_{2k+1}
922                // q3 = x_{3k+2}
923                let q1 = data.get_unchecked(k).0.to_f64()?;
924                let q2 = data.get_unchecked(2 * k + 1).0.to_f64()?;
925                let q3 = data.get_unchecked(3 * k + 2).0.to_f64()?;
926                (q1, q2, q3)
927            }
928        })
929    }
930}
931
932/// Zero-copy quartiles computation using index-based selection.
933/// This avoids copying data by working with an array of indices.
934///
935/// Uses `select_nth_unstable_by` on the indices array, which partitions in-place.
936/// After selecting position p, elements at indices [0..p] are <= the p-th element
937/// and elements at [p+1..] are >= it. By selecting positions in ascending order,
938/// each subsequent selection only needs to search within the right partition,
939/// avoiding redundant O(n) resets.
940fn quartiles_with_zero_copy_selection<T>(data: &[Partial<T>]) -> Option<(f64, f64, f64)>
941where
942    T: PartialOrd + ToPrimitive,
943{
944    let len = data.len();
945
946    // Early return for small arrays
947    match len {
948        0..=2 => {
949            core::hint::cold_path();
950            return None;
951        }
952        3 => {
953            let mut indices: Vec<usize> = (0..3).collect();
954            let cmp = |a: &usize, b: &usize| {
955                data[*a]
956                    .partial_cmp(&data[*b])
957                    .unwrap_or(std::cmp::Ordering::Less)
958            };
959            indices.sort_unstable_by(cmp);
960            let min_val = data[indices[0]].0.to_f64()?;
961            let med_val = data[indices[1]].0.to_f64()?;
962            let max_val = data[indices[2]].0.to_f64()?;
963            return Some((min_val, med_val, max_val));
964        }
965        _ => {}
966    }
967
968    let k = len / 4;
969    let remainder = len % 4;
970
971    let mut indices: Vec<usize> = (0..len).collect();
972    let cmp = |a: &usize, b: &usize| {
973        data[*a]
974            .partial_cmp(&data[*b])
975            .unwrap_or(std::cmp::Ordering::Less)
976    };
977
978    // Collect the unique positions we need in ascending order.
979    // By selecting in ascending order, each select_nth_unstable_by partitions
980    // the array so subsequent selections operate on progressively smaller slices.
981    // We deduplicate because adjacent quartile boundaries can overlap for small k.
982    let raw_positions: Vec<usize> = match remainder {
983        0 => vec![k - 1, k, 2 * k - 1, 2 * k, 3 * k - 1, 3 * k],
984        1 => vec![k - 1, k, 2 * k, 3 * k, 3 * k + 1],
985        2 => vec![k, 2 * k, 2 * k + 1, 3 * k + 1],
986        _ => vec![k, 2 * k + 1, 3 * k + 2],
987    };
988
989    let mut unique_positions = raw_positions.clone();
990    unique_positions.dedup();
991
992    // Select each unique position in ascending order, narrowing the search range
993    let mut start = 0;
994    for &pos in &unique_positions {
995        indices[start..].select_nth_unstable_by(pos - start, &cmp);
996        start = pos + 1;
997    }
998
999    // Now read all needed values (including duplicates) from the partitioned indices
1000    let values: Vec<f64> = raw_positions
1001        .iter()
1002        .map(|&pos| data[indices[pos]].0.to_f64())
1003        .collect::<Option<Vec<_>>>()?;
1004
1005    match remainder {
1006        0 => {
1007            let q1 = f64::midpoint(values[0], values[1]);
1008            let q2 = f64::midpoint(values[2], values[3]);
1009            let q3 = f64::midpoint(values[4], values[5]);
1010            Some((q1, q2, q3))
1011        }
1012        1 => {
1013            let q1 = f64::midpoint(values[0], values[1]);
1014            let q2 = values[2];
1015            let q3 = f64::midpoint(values[3], values[4]);
1016            Some((q1, q2, q3))
1017        }
1018        2 => {
1019            let q1 = values[0];
1020            let q2 = f64::midpoint(values[1], values[2]);
1021            let q3 = values[3];
1022            Some((q1, q2, q3))
1023        }
1024        _ => Some((values[0], values[1], values[2])),
1025    }
1026}
1027
1028fn mode_on_sorted<T, I>(it: I) -> Option<T>
1029where
1030    T: PartialOrd,
1031    I: Iterator<Item = T>,
1032{
1033    use std::cmp::Ordering;
1034
1035    // This approach to computing the mode works very nicely when the
1036    // number of samples is large and is close to its cardinality.
1037    // In other cases, a hashmap would be much better.
1038    // But really, how can we know this when given an arbitrary stream?
1039    // Might just switch to a hashmap to track frequencies. That would also
1040    // be generally useful for discovering the cardinality of a sample.
1041    let (mut mode, mut next) = (None, None);
1042    let (mut mode_count, mut next_count) = (0usize, 0usize);
1043    for x in it {
1044        if mode.as_ref() == Some(&x) {
1045            mode_count += 1;
1046        } else if next.as_ref() == Some(&x) {
1047            next_count += 1;
1048        } else {
1049            next = Some(x);
1050            next_count = 0;
1051        }
1052
1053        match next_count.cmp(&mode_count) {
1054            Ordering::Greater => {
1055                mode = next;
1056                mode_count = next_count;
1057                next = None;
1058                next_count = 0;
1059            }
1060            Ordering::Equal => {
1061                mode = None;
1062                mode_count = 0;
1063            }
1064            Ordering::Less => {}
1065        }
1066    }
1067    mode
1068}
1069
1070/// Computes both modes and antimodes from a sorted slice of values.
1071/// This version works with references to avoid unnecessary cloning.
1072///
1073/// # Arguments
1074///
1075/// * `data` - A sorted slice of values
1076///
1077/// # Notes
1078///
1079/// - Mode is the most frequently occurring value(s)
1080/// - Antimode is the least frequently occurring value(s)
1081/// - Only returns up to 10 antimodes to avoid returning the full set when all values are unique
1082/// - For empty slices, returns empty vectors and zero counts
1083/// - For single value slices, returns that value as the mode and empty antimode
1084/// - When all values occur exactly once, returns empty mode and up to 10 values as antimodes
1085///
1086/// # Returns
1087///
1088/// A tuple containing:
1089/// * Modes information: `(Vec<T>, usize, u32)` where:
1090///   - Vec<T>: Vector containing the mode values
1091///   - usize: Number of modes found
1092///   - u32: Frequency/count of the mode values
1093/// * Antimodes information: `(Vec<T>, usize, u32)` where:
1094///   - Vec<T>: Vector containing up to 10 antimode values
1095///   - usize: Total number of antimodes
1096///   - u32: Frequency/count of the antimode values
1097#[allow(clippy::type_complexity)]
1098#[inline]
1099fn modes_and_antimodes_on_sorted_slice<T>(
1100    data: &[Partial<T>],
1101) -> ((Vec<T>, usize, u32), (Vec<T>, usize, u32))
1102where
1103    T: PartialOrd + Clone,
1104{
1105    let size = data.len();
1106
1107    // Early return for empty slice
1108    if size == 0 {
1109        core::hint::cold_path();
1110        return ((Vec::new(), 0, 0), (Vec::new(), 0, 0));
1111    }
1112
1113    // Estimate capacity using integer square root of size
1114    let sqrt_size = size.isqrt();
1115    let mut runs: Vec<(&T, u32)> = Vec::with_capacity(sqrt_size.clamp(16, 1_000));
1116
1117    let mut current_value = &data[0].0;
1118    let mut current_count = 1;
1119    let mut highest_count = 1;
1120    let mut lowest_count = u32::MAX;
1121
1122    // Count consecutive runs - optimized to reduce allocations
1123    for x in data.iter().skip(1) {
1124        if x.0 == *current_value {
1125            current_count += 1;
1126            highest_count = highest_count.max(current_count);
1127        } else {
1128            runs.push((current_value, current_count));
1129            lowest_count = lowest_count.min(current_count);
1130            current_value = &x.0;
1131            current_count = 1;
1132        }
1133    }
1134    runs.push((current_value, current_count));
1135    lowest_count = lowest_count.min(current_count);
1136
1137    // Early return if only one unique value
1138    if runs.len() == 1 {
1139        let (val, count) = runs.pop().unwrap();
1140        return ((vec![val.clone()], 1, count), (Vec::new(), 0, 0));
1141    }
1142
1143    // Special case: if all values appear exactly once
1144    if highest_count == 1 {
1145        let antimodes_count = runs.len().min(10);
1146        let total_count = runs.len();
1147        let mut antimodes = Vec::with_capacity(antimodes_count);
1148        for (val, _) in runs.into_iter().take(antimodes_count) {
1149            antimodes.push(val.clone());
1150        }
1151        // For modes: empty, count 0, occurrences 0 (not 1, 1)
1152        return ((Vec::new(), 0, 0), (antimodes, total_count, 1));
1153    }
1154
1155    // Collect modes and antimodes directly in a single pass, cloning values immediately
1156    // instead of collecting indices first and then cloning in a second pass
1157    let estimated_modes = (runs.len() / 10).clamp(1, 10);
1158    let estimated_antimodes = 10.min(runs.len());
1159
1160    let mut modes_result = Vec::with_capacity(estimated_modes);
1161    let mut antimodes_result = Vec::with_capacity(estimated_antimodes);
1162    let mut mode_count = 0;
1163    let mut antimodes_count = 0;
1164    let mut antimodes_collected = 0_u32;
1165
1166    for (val, count) in &runs {
1167        if *count == highest_count {
1168            modes_result.push((*val).clone());
1169            mode_count += 1;
1170        }
1171        if *count == lowest_count {
1172            antimodes_count += 1;
1173            if antimodes_collected < 10 {
1174                antimodes_result.push((*val).clone());
1175                antimodes_collected += 1;
1176            }
1177        }
1178    }
1179
1180    (
1181        (modes_result, mode_count, highest_count),
1182        (antimodes_result, antimodes_count, lowest_count),
1183    )
1184}
1185
1186/// A commutative data structure for lazily sorted sequences of data.
1187///
1188/// The sort does not occur until statistics need to be computed.
1189///
1190/// Note that this works on types that do not define a total ordering like
1191/// `f32` and `f64`. When an ordering is not defined, an arbitrary order
1192/// is returned.
1193#[allow(clippy::unsafe_derive_deserialize)]
1194#[derive(Clone, Serialize, Deserialize)]
1195pub struct Unsorted<T> {
1196    /// Internal cache flag indicating whether `data` is currently sorted.
1197    /// This field is skipped during serialization and deserialization.
1198    #[serde(skip)]
1199    sorted: bool,
1200    data: Vec<Partial<T>>,
1201}
1202
1203// Manual PartialEq/Eq: ignore `sorted` cache flag so equality reflects
1204// logical contents only (two Unsorted with same data compare equal
1205// regardless of whether one has been sorted).
1206impl<T: PartialEq> PartialEq for Unsorted<T> {
1207    fn eq(&self, other: &Self) -> bool {
1208        self.data == other.data
1209    }
1210}
1211
1212impl<T: PartialEq> Eq for Unsorted<T> where Partial<T>: Eq {}
1213
1214impl<T: PartialOrd + Send> Unsorted<T> {
1215    /// Create initial empty state.
1216    #[inline]
1217    #[must_use]
1218    pub fn new() -> Unsorted<T> {
1219        Default::default()
1220    }
1221
1222    /// Add a new element to the set.
1223    #[allow(clippy::inline_always)]
1224    #[inline(always)]
1225    pub fn add(&mut self, v: T) {
1226        self.sorted = false;
1227        self.data.push(Partial(v));
1228    }
1229
1230    /// Return the number of data points.
1231    #[inline]
1232    #[must_use]
1233    pub const fn len(&self) -> usize {
1234        self.data.len()
1235    }
1236
1237    #[inline]
1238    #[must_use]
1239    pub const fn is_empty(&self) -> bool {
1240        self.data.is_empty()
1241    }
1242
1243    #[inline]
1244    fn sort(&mut self) {
1245        if !self.sorted {
1246            // Use sequential sort for small datasets (< 10k elements) to avoid parallel overhead
1247            if self.data.len() < PARALLEL_THRESHOLD {
1248                self.data.sort_unstable();
1249            } else {
1250                self.data.par_sort_unstable();
1251            }
1252            self.sorted = true;
1253        }
1254    }
1255
1256    #[inline]
1257    const fn already_sorted(&mut self) {
1258        self.sorted = true;
1259    }
1260
1261    /// Add multiple elements efficiently
1262    #[inline]
1263    pub fn add_bulk(&mut self, values: Vec<T>) {
1264        self.sorted = false;
1265        self.data.reserve(values.len());
1266        self.data.extend(values.into_iter().map(Partial));
1267    }
1268
1269    /// Shrink capacity to fit current data
1270    #[inline]
1271    pub fn shrink_to_fit(&mut self) {
1272        self.data.shrink_to_fit();
1273    }
1274
1275    /// Create with specific capacity
1276    #[inline]
1277    #[must_use]
1278    pub fn with_capacity(capacity: usize) -> Self {
1279        Unsorted {
1280            sorted: true,
1281            data: Vec::with_capacity(capacity),
1282        }
1283    }
1284
1285    /// Add a value assuming it's greater than all existing values
1286    #[inline]
1287    pub fn push_ascending(&mut self, value: T) {
1288        if let Some(last) = self.data.last() {
1289            debug_assert!(last.0 <= value, "Value must be >= than last element");
1290        }
1291        self.data.push(Partial(value));
1292        // Data remains sorted
1293    }
1294}
1295
1296impl<T: PartialOrd + PartialEq + Clone + Send + Sync> Unsorted<T> {
1297    #[inline]
1298    /// Returns the cardinality of the data.
1299    /// Set `sorted` to `true` if the data is already sorted.
1300    /// Set `parallel_threshold` to `0` to force sequential processing.
1301    /// Set `parallel_threshold` to `1` to use the default parallel threshold (`10_000`).
1302    /// Set `parallel_threshold` to `2` to force parallel processing.
1303    /// Set `parallel_threshold` to any other value to use a custom parallel threshold
1304    /// greater than the default threshold of `10_000`.
1305    pub fn cardinality(&mut self, sorted: bool, parallel_threshold: usize) -> u64 {
1306        const CHUNK_SIZE: usize = 2048; // Process data in chunks of 2048 elements
1307        const DEFAULT_PARALLEL_THRESHOLD: usize = 10_240; // multiple of 2048
1308
1309        let len = self.data.len();
1310        match len {
1311            0 => return 0,
1312            1 => return 1,
1313            _ => {}
1314        }
1315
1316        if sorted {
1317            self.already_sorted();
1318        } else {
1319            self.sort();
1320        }
1321
1322        let use_parallel = parallel_threshold != 0
1323            && (parallel_threshold == 1
1324                || len > parallel_threshold.max(DEFAULT_PARALLEL_THRESHOLD));
1325
1326        if use_parallel {
1327            // Parallel processing using chunks via fold/reduce — no intermediate Vec.
1328            // Reduction state: (count, leftmost_first, rightmost_last). Associative:
1329            // combining (cL, fL, lL) with (cR, fR, lR) yields (cL+cR - [lL==fR], fL, lR).
1330            self.data
1331                .par_chunks(CHUNK_SIZE)
1332                .map(|chunk| {
1333                    // Count unique elements within this chunk
1334                    let mut count = u64::from(!chunk.is_empty());
1335                    for [a, b] in chunk.array_windows::<2>() {
1336                        if a != b {
1337                            count += 1;
1338                        }
1339                    }
1340                    (count, chunk.first(), chunk.last())
1341                })
1342                .reduce(
1343                    || (0u64, None, None),
1344                    |(cl, fl, ll), (cr, fr, lr)| match (ll, fr) {
1345                        // `None` endpoints only arise from the identity today, but summing
1346                        // counts in both arms keeps the combiner correct even if a future
1347                        // mapper returns a non-empty chunk with `None` endpoints.
1348                        (None, _) => (cl + cr, fr, lr),
1349                        (_, None) => (cl + cr, fl, ll),
1350                        (Some(l), Some(r)) => {
1351                            let adj = u64::from(l == r);
1352                            (cl + cr - adj, fl, lr)
1353                        },
1354                    },
1355                )
1356                .0
1357        } else {
1358            // Sequential processing
1359
1360            // the statement below is equivalent to:
1361            // let mut count = if self.data.is_empty() { 0 } else { 1 };
1362            let mut count = u64::from(!self.data.is_empty());
1363
1364            for [a, b] in self.data.array_windows::<2>() {
1365                if a != b {
1366                    count += 1;
1367                }
1368            }
1369            count
1370        }
1371    }
1372}
1373
1374impl<T: PartialOrd + Clone + Send> Unsorted<T> {
1375    /// Returns the mode of the data.
1376    #[inline]
1377    pub fn mode(&mut self) -> Option<T> {
1378        if self.data.is_empty() {
1379            return None;
1380        }
1381        self.sort();
1382        mode_on_sorted(self.data.iter().map(|p| &p.0)).cloned()
1383    }
1384
1385    /// Returns the modes of the data.
1386    /// Note that there is also a `frequency::mode()` function that return one mode
1387    /// with the highest frequency. If there is a tie, it returns None.
1388    #[inline]
1389    fn modes(&mut self) -> (Vec<T>, usize, u32) {
1390        if self.data.is_empty() {
1391            return (Vec::new(), 0, 0);
1392        }
1393        self.sort();
1394        modes_and_antimodes_on_sorted_slice(&self.data).0
1395    }
1396
1397    /// Returns the antimodes of the data.
1398    /// `antimodes_result` only returns the first 10 antimodes
1399    #[inline]
1400    fn antimodes(&mut self) -> (Vec<T>, usize, u32) {
1401        if self.data.is_empty() {
1402            return (Vec::new(), 0, 0);
1403        }
1404        self.sort();
1405        modes_and_antimodes_on_sorted_slice(&self.data).1
1406    }
1407
1408    /// Returns the modes and antimodes of the data.
1409    /// `antimodes_result` only returns the first 10 antimodes
1410    #[allow(clippy::type_complexity)]
1411    #[inline]
1412    pub fn modes_antimodes(&mut self) -> ((Vec<T>, usize, u32), (Vec<T>, usize, u32)) {
1413        if self.data.is_empty() {
1414            return ((Vec::new(), 0, 0), (Vec::new(), 0, 0));
1415        }
1416        self.sort();
1417        modes_and_antimodes_on_sorted_slice(&self.data)
1418    }
1419}
1420
1421impl Unsorted<Vec<u8>> {
1422    /// Add a byte slice, converting to `Vec<u8>` internally.
1423    ///
1424    /// This is a convenience method that avoids requiring the caller to call
1425    /// `.to_vec()` before `add()`. The allocation still occurs internally,
1426    /// but the API is cleaner and opens the door for future optimizations
1427    /// (e.g., frequency-map deduplication for high-cardinality data).
1428    #[allow(clippy::inline_always)]
1429    #[inline(always)]
1430    pub fn add_bytes(&mut self, v: &[u8]) {
1431        self.sorted = false;
1432        self.data.push(Partial(v.to_vec()));
1433    }
1434}
1435
1436impl<T: PartialOrd + ToPrimitive + Send> Unsorted<T> {
1437    /// Returns the median of the data.
1438    #[inline]
1439    pub fn median(&mut self) -> Option<f64> {
1440        if self.data.is_empty() {
1441            return None;
1442        }
1443        self.sort();
1444        median_on_sorted(&self.data)
1445    }
1446}
1447
1448impl<T: PartialOrd + ToPrimitive + Send + Sync> Unsorted<T> {
1449    /// Returns the Median Absolute Deviation (MAD) of the data.
1450    #[inline]
1451    pub fn mad(&mut self, existing_median: Option<f64>) -> Option<f64> {
1452        if self.data.is_empty() {
1453            return None;
1454        }
1455        if existing_median.is_none() {
1456            self.sort();
1457        }
1458        mad_on_sorted(&self.data, existing_median)
1459    }
1460}
1461
1462impl<T: PartialOrd + ToPrimitive + Send> Unsorted<T> {
1463    /// Returns the quartiles of the data using the traditional sorting approach.
1464    ///
1465    /// This method sorts the data first and then computes quartiles.
1466    /// Time complexity: O(n log n)
1467    #[inline]
1468    pub fn quartiles(&mut self) -> Option<(f64, f64, f64)> {
1469        if self.data.is_empty() {
1470            return None;
1471        }
1472        self.sort();
1473        quartiles_on_sorted(&self.data)
1474    }
1475}
1476
1477impl<T: PartialOrd + ToPrimitive + Send + Sync> Unsorted<T> {
1478    /// Returns the Gini Coefficient of the data.
1479    ///
1480    /// The Gini Coefficient measures inequality in a distribution, ranging from 0 (perfect equality)
1481    /// to 1 (perfect inequality). This method sorts the data first and then computes the Gini coefficient.
1482    /// Time complexity: O(n log n)
1483    #[inline]
1484    pub fn gini(&mut self, precalc_sum: Option<f64>) -> Option<f64> {
1485        if self.data.is_empty() {
1486            return None;
1487        }
1488        self.sort();
1489        gini_on_sorted(&self.data, precalc_sum)
1490    }
1491
1492    /// Returns the kurtosis (excess kurtosis) of the data.
1493    ///
1494    /// Kurtosis measures the "tailedness" of a distribution. Excess kurtosis is kurtosis - 3,
1495    /// where 0 indicates a normal distribution, positive values indicate heavy tails, and
1496    /// negative values indicate light tails. This method sorts the data first and then computes kurtosis.
1497    /// Time complexity: O(n log n)
1498    #[inline]
1499    pub fn kurtosis(
1500        &mut self,
1501        precalc_mean: Option<f64>,
1502        precalc_variance: Option<f64>,
1503    ) -> Option<f64> {
1504        if self.data.is_empty() {
1505            return None;
1506        }
1507        self.sort();
1508        kurtosis_on_sorted(&self.data, precalc_mean, precalc_variance)
1509    }
1510
1511    /// Returns the percentile rank of a value in the data.
1512    ///
1513    /// Returns the percentile rank (0-100) of the given value. If the value is less than all
1514    /// values, returns 0.0. If greater than all, returns 100.0.
1515    /// This method sorts the data first and then computes the percentile rank.
1516    /// Time complexity: O(n log n)
1517    #[inline]
1518    #[allow(clippy::needless_pass_by_value)]
1519    pub fn percentile_rank<V>(&mut self, value: V) -> Option<f64>
1520    where
1521        V: PartialOrd + ToPrimitive,
1522    {
1523        if self.data.is_empty() {
1524            return None;
1525        }
1526        self.sort();
1527        percentile_rank_on_sorted(&self.data, &value)
1528    }
1529
1530    /// Returns the Atkinson Index of the data.
1531    ///
1532    /// The Atkinson Index measures inequality with an inequality aversion parameter ε.
1533    /// It ranges from 0 (perfect equality) to 1 (perfect inequality).
1534    /// Higher ε values give more weight to inequality at the lower end of the distribution.
1535    /// This method sorts the data first and then computes the Atkinson index.
1536    /// Time complexity: O(n log n)
1537    ///
1538    /// # Arguments
1539    /// * `epsilon` - Inequality aversion parameter (must be >= 0). Common values:
1540    ///   - 0.0: No inequality aversion (returns 0)
1541    ///   - 0.5: Moderate aversion
1542    ///   - 1.0: Uses geometric mean (special case)
1543    ///   - 2.0: High aversion
1544    /// * `precalc_mean` - Optional pre-calculated mean
1545    /// * `precalc_geometric_sum` - Optional pre-calculated geometric sum (sum of ln(val)), only used when epsilon = 1
1546    #[inline]
1547    pub fn atkinson(
1548        &mut self,
1549        epsilon: f64,
1550        precalc_mean: Option<f64>,
1551        precalc_geometric_sum: Option<f64>,
1552    ) -> Option<f64> {
1553        if self.data.is_empty() {
1554            return None;
1555        }
1556        self.sort();
1557        atkinson_on_sorted(&self.data, epsilon, precalc_mean, precalc_geometric_sum)
1558    }
1559}
1560
1561impl<T: PartialOrd + ToPrimitive + Clone + Send> Unsorted<T> {
1562    /// Returns the quartiles of the data using selection algorithm.
1563    ///
1564    /// This implementation uses a selection algorithm (quickselect) to find quartiles
1565    /// in O(n) average time complexity instead of O(n log n) sorting.
1566    /// Requires T to implement Clone to create a working copy of the data.
1567    ///
1568    /// **Performance Note**: While theoretically O(n) vs O(n log n), this implementation
1569    /// is often slower than the sorting-based approach for small to medium datasets due to:
1570    /// - Need to find multiple order statistics (3 separate quickselect calls)
1571    /// - Overhead of copying data to avoid mutation
1572    /// - Rayon's highly optimized parallel sorting
1573    #[inline]
1574    pub fn quartiles_with_selection(&mut self) -> Option<(f64, f64, f64)> {
1575        if self.data.is_empty() {
1576            return None;
1577        }
1578        // Use zero-copy approach (indices-based) to avoid cloning all elements
1579        quartiles_with_zero_copy_selection(&self.data)
1580    }
1581}
1582
1583impl<T: PartialOrd + ToPrimitive + Send> Unsorted<T> {
1584    /// Returns the quartiles using zero-copy selection algorithm.
1585    ///
1586    /// This implementation avoids copying data by working with indices instead,
1587    /// providing better performance than the clone-based selection approach.
1588    /// The algorithm is O(n) average time and only allocates a vector of indices (usize).
1589    #[inline]
1590    #[must_use]
1591    pub fn quartiles_zero_copy(&self) -> Option<(f64, f64, f64)> {
1592        if self.data.is_empty() {
1593            return None;
1594        }
1595        quartiles_with_zero_copy_selection(&self.data)
1596    }
1597}
1598
1599impl<T: PartialOrd + Send> Commute for Unsorted<T> {
1600    #[inline]
1601    fn merge(&mut self, mut v: Unsorted<T>) {
1602        if v.is_empty() {
1603            return;
1604        }
1605
1606        self.sorted = false;
1607        // we use std::mem::take to avoid unnecessary allocations
1608        self.data.extend(std::mem::take(&mut v.data));
1609    }
1610}
1611
1612impl<T: PartialOrd> Default for Unsorted<T> {
1613    #[inline]
1614    fn default() -> Unsorted<T> {
1615        Unsorted {
1616            data: Vec::with_capacity(16),
1617            sorted: true, // empty is sorted
1618        }
1619    }
1620}
1621
1622impl<T: PartialOrd + Send> FromIterator<T> for Unsorted<T> {
1623    #[inline]
1624    fn from_iter<I: IntoIterator<Item = T>>(it: I) -> Unsorted<T> {
1625        let mut v = Unsorted::new();
1626        v.extend(it);
1627        v
1628    }
1629}
1630
1631impl<T: PartialOrd> Extend<T> for Unsorted<T> {
1632    #[inline]
1633    fn extend<I: IntoIterator<Item = T>>(&mut self, it: I) {
1634        self.sorted = false;
1635        self.data.extend(it.into_iter().map(Partial));
1636    }
1637}
1638
1639fn custom_percentiles_on_sorted<T>(data: &[Partial<T>], percentiles: &[u8]) -> Option<Vec<T>>
1640where
1641    T: PartialOrd + Clone,
1642{
1643    let len = data.len();
1644
1645    // Early return for empty array or invalid percentiles
1646    if len == 0 || percentiles.iter().any(|&p| p > 100) {
1647        return None;
1648    }
1649
1650    // Optimize: Check if percentiles are already sorted and unique
1651    let unique_percentiles: Vec<u8> = if percentiles.len() <= 1 {
1652        // Single or empty percentile - no need to sort/dedup
1653        percentiles.to_vec()
1654    } else {
1655        // Check if already sorted and unique (common case)
1656        let is_sorted_unique = percentiles.array_windows::<2>().all(|[a, b]| a < b);
1657
1658        if is_sorted_unique {
1659            // Already sorted and unique, use directly without cloning
1660            percentiles.to_vec()
1661        } else {
1662            // Need to sort and dedup - use fixed-size bool array (domain is 0..=100)
1663            let mut seen = [false; 101];
1664            let mut sorted_unique = Vec::with_capacity(percentiles.len().min(101));
1665            for &p in percentiles {
1666                if !seen[p as usize] {
1667                    seen[p as usize] = true;
1668                    sorted_unique.push(p);
1669                }
1670            }
1671            sorted_unique.sort_unstable();
1672            sorted_unique
1673        }
1674    };
1675
1676    let mut results = Vec::with_capacity(unique_percentiles.len());
1677
1678    // SAFETY: All index calculations below are guaranteed to be in bounds
1679    // because we've verified len > 0 and the rank calculation ensures
1680    // the index is within bounds
1681    unsafe {
1682        for &p in &unique_percentiles {
1683            // Calculate the ordinal rank using nearest-rank method
1684            // see https://en.wikipedia.org/wiki/Percentile#The_nearest-rank_method
1685            // n = ⌈(P/100) × N⌉
1686            #[allow(clippy::cast_sign_loss)]
1687            let rank = ((f64::from(p) / 100.0) * len as f64).ceil() as usize;
1688
1689            // Convert to 0-based index
1690            let idx = rank.saturating_sub(1);
1691
1692            // Get the value at that rank and extract the inner value
1693            results.push(data.get_unchecked(idx).0.clone());
1694        }
1695    }
1696
1697    Some(results)
1698}
1699
1700impl<T: PartialOrd + Clone + Send> Unsorted<T> {
1701    /// Returns the requested percentiles of the data.
1702    ///
1703    /// Uses the nearest-rank method to compute percentiles.
1704    /// Each returned value is an actual value from the dataset.
1705    ///
1706    /// # Arguments
1707    /// * `percentiles` - A slice of u8 values representing percentiles to compute (0-100)
1708    ///
1709    /// # Returns
1710    /// * `None` if the data is empty or if any percentile is > 100
1711    /// * `Some(Vec<T>)` containing percentile values in the same order as requested
1712    ///
1713    /// # Example
1714    /// ```
1715    /// use stats::Unsorted;
1716    /// let mut data = Unsorted::new();
1717    /// data.extend(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
1718    /// let percentiles = vec![25, 50, 75];
1719    /// let results = data.custom_percentiles(&percentiles).unwrap();
1720    /// assert_eq!(results, vec![3, 5, 8]);
1721    /// ```
1722    #[inline]
1723    pub fn custom_percentiles(&mut self, percentiles: &[u8]) -> Option<Vec<T>> {
1724        if self.data.is_empty() {
1725            return None;
1726        }
1727        self.sort();
1728        custom_percentiles_on_sorted(&self.data, percentiles)
1729    }
1730}
1731
1732#[cfg(test)]
1733mod test {
1734    use super::*;
1735
1736    #[test]
1737    fn test_cardinality_empty() {
1738        let mut unsorted: Unsorted<i32> = Unsorted::new();
1739        assert_eq!(unsorted.cardinality(false, 1), 0);
1740    }
1741
1742    #[test]
1743    fn test_cardinality_single_element() {
1744        let mut unsorted = Unsorted::new();
1745        unsorted.add(5);
1746        assert_eq!(unsorted.cardinality(false, 1), 1);
1747    }
1748
1749    #[test]
1750    fn test_cardinality_unique_elements() {
1751        let mut unsorted = Unsorted::new();
1752        unsorted.extend(vec![1, 2, 3, 4, 5]);
1753        assert_eq!(unsorted.cardinality(false, 1), 5);
1754    }
1755
1756    #[test]
1757    fn test_cardinality_duplicate_elements() {
1758        let mut unsorted = Unsorted::new();
1759        unsorted.extend(vec![1, 2, 2, 3, 3, 3, 4, 4, 4, 4]);
1760        assert_eq!(unsorted.cardinality(false, 1), 4);
1761    }
1762
1763    #[test]
1764    fn test_cardinality_all_same() {
1765        let mut unsorted = Unsorted::new();
1766        unsorted.extend(vec![1; 100]);
1767        assert_eq!(unsorted.cardinality(false, 1), 1);
1768    }
1769
1770    #[test]
1771    fn test_cardinality_large_range() {
1772        let mut unsorted = Unsorted::new();
1773        unsorted.extend(0..1_000_000);
1774        assert_eq!(unsorted.cardinality(false, 1), 1_000_000);
1775    }
1776
1777    #[test]
1778    fn test_cardinality_large_range_sequential() {
1779        let mut unsorted = Unsorted::new();
1780        unsorted.extend(0..1_000_000);
1781        assert_eq!(unsorted.cardinality(false, 2_000_000), 1_000_000);
1782    }
1783
1784    #[test]
1785    fn test_cardinality_presorted() {
1786        let mut unsorted = Unsorted::new();
1787        unsorted.extend(vec![1, 2, 3, 4, 5]);
1788        unsorted.sort();
1789        assert_eq!(unsorted.cardinality(true, 1), 5);
1790    }
1791
1792    #[test]
1793    fn test_cardinality_float() {
1794        let mut unsorted = Unsorted::new();
1795        unsorted.extend(vec![1.0, 1.0, 2.0, 3.0, 3.0, 4.0]);
1796        assert_eq!(unsorted.cardinality(false, 1), 4);
1797    }
1798
1799    #[test]
1800    fn test_cardinality_string() {
1801        let mut unsorted = Unsorted::new();
1802        unsorted.extend(vec!["a", "b", "b", "c", "c", "c"]);
1803        assert_eq!(unsorted.cardinality(false, 1), 3);
1804    }
1805
1806    #[test]
1807    fn test_quartiles_selection_vs_sorted() {
1808        // Test that selection-based quartiles gives same results as sorting-based
1809        let test_cases = vec![
1810            vec![3, 5, 7, 9],
1811            vec![3, 5, 7],
1812            vec![1, 2, 7, 11],
1813            vec![3, 5, 7, 9, 12],
1814            vec![2, 2, 3, 8, 10],
1815            vec![3, 5, 7, 9, 12, 20],
1816            vec![0, 2, 4, 8, 10, 11],
1817            vec![3, 5, 7, 9, 12, 20, 21],
1818            vec![1, 5, 6, 6, 7, 10, 19],
1819        ];
1820
1821        for test_case in test_cases {
1822            let mut unsorted1 = Unsorted::new();
1823            let mut unsorted2 = Unsorted::new();
1824            let mut unsorted3 = Unsorted::new();
1825            unsorted1.extend(test_case.clone());
1826            unsorted2.extend(test_case.clone());
1827            unsorted3.extend(test_case.clone());
1828
1829            let result_sorted = unsorted1.quartiles();
1830            let result_selection = unsorted2.quartiles_with_selection();
1831            let result_zero_copy = unsorted3.quartiles_zero_copy();
1832
1833            assert_eq!(
1834                result_sorted, result_selection,
1835                "Selection mismatch for test case: {:?}",
1836                test_case
1837            );
1838            assert_eq!(
1839                result_sorted, result_zero_copy,
1840                "Zero-copy mismatch for test case: {:?}",
1841                test_case
1842            );
1843        }
1844    }
1845
1846    #[test]
1847    fn test_quartiles_with_selection_small() {
1848        // Test edge cases for selection-based quartiles
1849        let mut unsorted: Unsorted<i32> = Unsorted::new();
1850        assert_eq!(unsorted.quartiles_with_selection(), None);
1851
1852        let mut unsorted = Unsorted::new();
1853        unsorted.extend(vec![1, 2]);
1854        assert_eq!(unsorted.quartiles_with_selection(), None);
1855
1856        let mut unsorted = Unsorted::new();
1857        unsorted.extend(vec![1, 2, 3]);
1858        assert_eq!(unsorted.quartiles_with_selection(), Some((1.0, 2.0, 3.0)));
1859    }
1860
1861    #[test]
1862    fn test_quickselect() {
1863        let data = vec![
1864            Partial(3),
1865            Partial(1),
1866            Partial(4),
1867            Partial(1),
1868            Partial(5),
1869            Partial(9),
1870            Partial(2),
1871            Partial(6),
1872        ];
1873
1874        // Test finding different positions
1875        assert_eq!(quickselect(&mut data.clone(), 0), Some(&1));
1876        assert_eq!(quickselect(&mut data.clone(), 3), Some(&3));
1877        assert_eq!(quickselect(&mut data.clone(), 7), Some(&9));
1878
1879        // Test edge cases
1880        let mut empty: Vec<Partial<i32>> = vec![];
1881        assert_eq!(quickselect(&mut empty, 0), None);
1882
1883        let mut data = vec![Partial(3), Partial(1), Partial(4), Partial(1), Partial(5)];
1884        assert_eq!(quickselect(&mut data, 10), None); // k >= len
1885    }
1886
1887    #[test]
1888    fn median_stream() {
1889        assert_eq!(median(vec![3usize, 5, 7, 9].into_iter()), Some(6.0));
1890        assert_eq!(median(vec![3usize, 5, 7].into_iter()), Some(5.0));
1891    }
1892
1893    #[test]
1894    fn mad_stream() {
1895        assert_eq!(mad(vec![3usize, 5, 7, 9].into_iter(), None), Some(2.0));
1896        assert_eq!(
1897            mad(
1898                vec![
1899                    86usize, 60, 95, 39, 49, 12, 56, 82, 92, 24, 33, 28, 46, 34, 100, 39, 100, 38,
1900                    50, 61, 39, 88, 5, 13, 64
1901                ]
1902                .into_iter(),
1903                None
1904            ),
1905            Some(16.0)
1906        );
1907    }
1908
1909    #[test]
1910    fn mad_stream_precalc_median() {
1911        let data = vec![3usize, 5, 7, 9].into_iter();
1912        let median1 = median(data.clone());
1913        assert_eq!(mad(data, median1), Some(2.0));
1914
1915        let data2 = vec![
1916            86usize, 60, 95, 39, 49, 12, 56, 82, 92, 24, 33, 28, 46, 34, 100, 39, 100, 38, 50, 61,
1917            39, 88, 5, 13, 64,
1918        ]
1919        .into_iter();
1920        let median2 = median(data2.clone());
1921        assert_eq!(mad(data2, median2), Some(16.0));
1922    }
1923
1924    #[test]
1925    fn mode_stream() {
1926        assert_eq!(mode(vec![3usize, 5, 7, 9].into_iter()), None);
1927        assert_eq!(mode(vec![3usize, 3, 3, 3].into_iter()), Some(3));
1928        assert_eq!(mode(vec![3usize, 3, 3, 4].into_iter()), Some(3));
1929        assert_eq!(mode(vec![4usize, 3, 3, 3].into_iter()), Some(3));
1930        assert_eq!(mode(vec![1usize, 1, 2, 3, 3].into_iter()), None);
1931    }
1932
1933    #[test]
1934    fn median_floats() {
1935        assert_eq!(median(vec![3.0f64, 5.0, 7.0, 9.0].into_iter()), Some(6.0));
1936        assert_eq!(median(vec![3.0f64, 5.0, 7.0].into_iter()), Some(5.0));
1937    }
1938
1939    #[test]
1940    fn mode_floats() {
1941        assert_eq!(mode(vec![3.0f64, 5.0, 7.0, 9.0].into_iter()), None);
1942        assert_eq!(mode(vec![3.0f64, 3.0, 3.0, 3.0].into_iter()), Some(3.0));
1943        assert_eq!(mode(vec![3.0f64, 3.0, 3.0, 4.0].into_iter()), Some(3.0));
1944        assert_eq!(mode(vec![4.0f64, 3.0, 3.0, 3.0].into_iter()), Some(3.0));
1945        assert_eq!(mode(vec![1.0f64, 1.0, 2.0, 3.0, 3.0].into_iter()), None);
1946    }
1947
1948    #[test]
1949    fn modes_stream() {
1950        assert_eq!(modes(vec![3usize, 5, 7, 9].into_iter()), (vec![], 0, 0));
1951        assert_eq!(modes(vec![3usize, 3, 3, 3].into_iter()), (vec![3], 1, 4));
1952        assert_eq!(modes(vec![3usize, 3, 4, 4].into_iter()), (vec![3, 4], 2, 2));
1953        assert_eq!(modes(vec![4usize, 3, 3, 3].into_iter()), (vec![3], 1, 3));
1954        assert_eq!(modes(vec![1usize, 1, 2, 2].into_iter()), (vec![1, 2], 2, 2));
1955        let vec: Vec<u32> = vec![];
1956        assert_eq!(modes(vec.into_iter()), (vec![], 0, 0));
1957    }
1958
1959    #[test]
1960    fn modes_floats() {
1961        assert_eq!(
1962            modes(vec![3_f64, 5.0, 7.0, 9.0].into_iter()),
1963            (vec![], 0, 0)
1964        );
1965        assert_eq!(
1966            modes(vec![3_f64, 3.0, 3.0, 3.0].into_iter()),
1967            (vec![3.0], 1, 4)
1968        );
1969        assert_eq!(
1970            modes(vec![3_f64, 3.0, 4.0, 4.0].into_iter()),
1971            (vec![3.0, 4.0], 2, 2)
1972        );
1973        assert_eq!(
1974            modes(vec![1_f64, 1.0, 2.0, 3.0, 3.0].into_iter()),
1975            (vec![1.0, 3.0], 2, 2)
1976        );
1977    }
1978
1979    #[test]
1980    fn antimodes_stream() {
1981        assert_eq!(
1982            antimodes(vec![3usize, 5, 7, 9].into_iter()),
1983            (vec![3, 5, 7, 9], 4, 1)
1984        );
1985        assert_eq!(
1986            antimodes(vec![1usize, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13].into_iter()),
1987            (vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 13, 1)
1988        );
1989        assert_eq!(
1990            antimodes(vec![1usize, 3, 3, 3].into_iter()),
1991            (vec![1], 1, 1)
1992        );
1993        assert_eq!(
1994            antimodes(vec![3usize, 3, 4, 4].into_iter()),
1995            (vec![3, 4], 2, 2)
1996        );
1997        assert_eq!(
1998            antimodes(
1999                vec![
2000                    3usize, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
2001                    14, 14, 15, 15
2002                ]
2003                .into_iter()
2004            ),
2005            // we only show the first 10 of the 13 antimodes
2006            (vec![3, 4, 5, 6, 7, 8, 9, 10, 11, 12], 13, 2)
2007        );
2008        assert_eq!(
2009            antimodes(
2010                vec![
2011                    3usize, 3, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 4, 4, 5, 5, 6, 6, 7, 7, 13, 13,
2012                    14, 14, 15, 15
2013                ]
2014                .into_iter()
2015            ),
2016            (vec![3, 4, 5, 6, 7, 8, 9, 10, 11, 12], 13, 2)
2017        );
2018        assert_eq!(
2019            antimodes(vec![3usize, 3, 3, 4].into_iter()),
2020            (vec![4], 1, 1)
2021        );
2022        assert_eq!(
2023            antimodes(vec![4usize, 3, 3, 3].into_iter()),
2024            (vec![4], 1, 1)
2025        );
2026        assert_eq!(
2027            antimodes(vec![1usize, 1, 2, 2].into_iter()),
2028            (vec![1, 2], 2, 2)
2029        );
2030        let vec: Vec<u32> = vec![];
2031        assert_eq!(antimodes(vec.into_iter()), (vec![], 0, 0));
2032    }
2033
2034    #[test]
2035    fn antimodes_floats() {
2036        assert_eq!(
2037            antimodes(vec![3_f64, 5.0, 7.0, 9.0].into_iter()),
2038            (vec![3.0, 5.0, 7.0, 9.0], 4, 1)
2039        );
2040        assert_eq!(
2041            antimodes(vec![3_f64, 3.0, 3.0, 3.0].into_iter()),
2042            (vec![], 0, 0)
2043        );
2044        assert_eq!(
2045            antimodes(vec![3_f64, 3.0, 4.0, 4.0].into_iter()),
2046            (vec![3.0, 4.0], 2, 2)
2047        );
2048        assert_eq!(
2049            antimodes(vec![1_f64, 1.0, 2.0, 3.0, 3.0].into_iter()),
2050            (vec![2.0], 1, 1)
2051        );
2052    }
2053
2054    #[test]
2055    fn test_custom_percentiles() {
2056        // Test with integers
2057        let mut unsorted: Unsorted<i32> = Unsorted::new();
2058        unsorted.extend(1..=11); // [1,2,3,4,5,6,7,8,9,10,11]
2059
2060        let result = unsorted.custom_percentiles(&[25, 50, 75]).unwrap();
2061        assert_eq!(result, vec![3, 6, 9]);
2062
2063        // Test with strings
2064        let mut str_data = Unsorted::new();
2065        str_data.extend(vec!["a", "b", "c", "d", "e"]);
2066        let result = str_data.custom_percentiles(&[20, 40, 60, 80]).unwrap();
2067        assert_eq!(result, vec!["a", "b", "c", "d"]);
2068
2069        // Test with chars
2070        let mut char_data = Unsorted::new();
2071        char_data.extend('a'..='e');
2072        let result = char_data.custom_percentiles(&[25, 50, 75]).unwrap();
2073        assert_eq!(result, vec!['b', 'c', 'd']);
2074
2075        // Test with floats
2076        let mut float_data = Unsorted::new();
2077        float_data.extend(vec![1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9]);
2078        let result = float_data
2079            .custom_percentiles(&[10, 30, 50, 70, 90])
2080            .unwrap();
2081        assert_eq!(result, vec![1.1, 3.3, 5.5, 7.7, 9.9]);
2082
2083        // Test with empty percentiles array
2084        let result = float_data.custom_percentiles(&[]).unwrap();
2085        assert_eq!(result, Vec::<f64>::new());
2086
2087        // Test with duplicate percentiles
2088        let result = float_data.custom_percentiles(&[50, 50, 50]).unwrap();
2089        assert_eq!(result, vec![5.5]);
2090
2091        // Test with extreme percentiles
2092        let result = float_data.custom_percentiles(&[0, 100]).unwrap();
2093        assert_eq!(result, vec![1.1, 9.9]);
2094
2095        // Test with unsorted percentiles
2096        let result = float_data.custom_percentiles(&[75, 25, 50]).unwrap();
2097        assert_eq!(result, vec![3.3, 5.5, 7.7]); // results always sorted
2098
2099        // Test with single element
2100        let mut single = Unsorted::new();
2101        single.add(42);
2102        let result = single.custom_percentiles(&[0, 50, 100]).unwrap();
2103        assert_eq!(result, vec![42, 42, 42]);
2104    }
2105
2106    #[test]
2107    fn quartiles_stream() {
2108        assert_eq!(
2109            quartiles(vec![3usize, 5, 7].into_iter()),
2110            Some((3., 5., 7.))
2111        );
2112        assert_eq!(
2113            quartiles(vec![3usize, 5, 7, 9].into_iter()),
2114            Some((4., 6., 8.))
2115        );
2116        assert_eq!(
2117            quartiles(vec![1usize, 2, 7, 11].into_iter()),
2118            Some((1.5, 4.5, 9.))
2119        );
2120        assert_eq!(
2121            quartiles(vec![3usize, 5, 7, 9, 12].into_iter()),
2122            Some((4., 7., 10.5))
2123        );
2124        assert_eq!(
2125            quartiles(vec![2usize, 2, 3, 8, 10].into_iter()),
2126            Some((2., 3., 9.))
2127        );
2128        assert_eq!(
2129            quartiles(vec![3usize, 5, 7, 9, 12, 20].into_iter()),
2130            Some((5., 8., 12.))
2131        );
2132        assert_eq!(
2133            quartiles(vec![0usize, 2, 4, 8, 10, 11].into_iter()),
2134            Some((2., 6., 10.))
2135        );
2136        assert_eq!(
2137            quartiles(vec![3usize, 5, 7, 9, 12, 20, 21].into_iter()),
2138            Some((5., 9., 20.))
2139        );
2140        assert_eq!(
2141            quartiles(vec![1usize, 5, 6, 6, 7, 10, 19].into_iter()),
2142            Some((5., 6., 10.))
2143        );
2144    }
2145
2146    #[test]
2147    fn quartiles_floats() {
2148        assert_eq!(
2149            quartiles(vec![3_f64, 5., 7.].into_iter()),
2150            Some((3., 5., 7.))
2151        );
2152        assert_eq!(
2153            quartiles(vec![3_f64, 5., 7., 9.].into_iter()),
2154            Some((4., 6., 8.))
2155        );
2156        assert_eq!(
2157            quartiles(vec![3_f64, 5., 7., 9., 12.].into_iter()),
2158            Some((4., 7., 10.5))
2159        );
2160        assert_eq!(
2161            quartiles(vec![3_f64, 5., 7., 9., 12., 20.].into_iter()),
2162            Some((5., 8., 12.))
2163        );
2164        assert_eq!(
2165            quartiles(vec![3_f64, 5., 7., 9., 12., 20., 21.].into_iter()),
2166            Some((5., 9., 20.))
2167        );
2168    }
2169
2170    #[test]
2171    fn test_quartiles_zero_copy_small() {
2172        // Test edge cases for zero-copy quartiles
2173        let unsorted: Unsorted<i32> = Unsorted::new();
2174        assert_eq!(unsorted.quartiles_zero_copy(), None);
2175
2176        let mut unsorted = Unsorted::new();
2177        unsorted.extend(vec![1, 2]);
2178        assert_eq!(unsorted.quartiles_zero_copy(), None);
2179
2180        let mut unsorted = Unsorted::new();
2181        unsorted.extend(vec![1, 2, 3]);
2182        assert_eq!(unsorted.quartiles_zero_copy(), Some((1.0, 2.0, 3.0)));
2183
2184        // Test larger case
2185        let mut unsorted = Unsorted::new();
2186        unsorted.extend(vec![3, 5, 7, 9]);
2187        assert_eq!(unsorted.quartiles_zero_copy(), Some((4.0, 6.0, 8.0)));
2188    }
2189
2190    #[test]
2191    fn gini_empty() {
2192        let mut unsorted: Unsorted<i32> = Unsorted::new();
2193        assert_eq!(unsorted.gini(None), None);
2194        let empty_vec: Vec<i32> = vec![];
2195        assert_eq!(gini(empty_vec.into_iter(), None), None);
2196    }
2197
2198    #[test]
2199    fn gini_single_element() {
2200        let mut unsorted = Unsorted::new();
2201        unsorted.add(5);
2202        assert_eq!(unsorted.gini(None), Some(0.0));
2203        assert_eq!(gini(vec![5].into_iter(), None), Some(0.0));
2204    }
2205
2206    #[test]
2207    fn gini_perfect_equality() {
2208        // All values are the same - perfect equality, Gini = 0
2209        let mut unsorted = Unsorted::new();
2210        unsorted.extend(vec![10, 10, 10, 10, 10]);
2211        let result = unsorted.gini(None).unwrap();
2212        assert!((result - 0.0).abs() < 1e-10, "Expected 0.0, got {}", result);
2213
2214        assert!((gini(vec![10, 10, 10, 10, 10].into_iter(), None).unwrap() - 0.0).abs() < 1e-10);
2215    }
2216
2217    #[test]
2218    fn gini_perfect_inequality() {
2219        // One value has everything, others have zero - perfect inequality
2220        // For [0, 0, 0, 0, 100], Gini should be close to 1
2221        let mut unsorted = Unsorted::new();
2222        unsorted.extend(vec![0, 0, 0, 0, 100]);
2223        let result = unsorted.gini(None).unwrap();
2224        // Perfect inequality should give Gini close to 1
2225        // For n=5, one value=100, others=0: G = (2*5*100)/(5*100) - 6/5 = 2 - 1.2 = 0.8
2226        assert!((result - 0.8).abs() < 1e-10, "Expected 0.8, got {}", result);
2227    }
2228
2229    #[test]
2230    fn gini_stream() {
2231        // Test with known values
2232        // For [1, 2, 3, 4, 5]:
2233        // sum = 15
2234        // weighted_sum = 1*1 + 2*2 + 3*3 + 4*4 + 5*5 = 1 + 4 + 9 + 16 + 25 = 55
2235        // n = 5
2236        // G = (2 * 55) / (5 * 15) - 6/5 = 110/75 - 1.2 = 1.4667 - 1.2 = 0.2667
2237        let result = gini(vec![1usize, 2, 3, 4, 5].into_iter(), None).unwrap();
2238        let expected = (2.0 * 55.0) / (5.0 * 15.0) - 6.0 / 5.0;
2239        assert!(
2240            (result - expected).abs() < 1e-10,
2241            "Expected {}, got {}",
2242            expected,
2243            result
2244        );
2245    }
2246
2247    #[test]
2248    fn gini_floats() {
2249        let mut unsorted = Unsorted::new();
2250        unsorted.extend(vec![1.0, 2.0, 3.0, 4.0, 5.0]);
2251        let result = unsorted.gini(None).unwrap();
2252        let expected = (2.0 * 55.0) / (5.0 * 15.0) - 6.0 / 5.0;
2253        assert!((result - expected).abs() < 1e-10);
2254
2255        assert!(
2256            (gini(vec![1.0f64, 2.0, 3.0, 4.0, 5.0].into_iter(), None).unwrap() - expected).abs()
2257                < 1e-10
2258        );
2259    }
2260
2261    #[test]
2262    fn gini_all_zeros() {
2263        // All zeros - sum is zero, Gini is undefined
2264        let mut unsorted = Unsorted::new();
2265        unsorted.extend(vec![0, 0, 0, 0]);
2266        assert_eq!(unsorted.gini(None), None);
2267        assert_eq!(gini(vec![0, 0, 0, 0].into_iter(), None), None);
2268    }
2269
2270    #[test]
2271    fn gini_negative_values() {
2272        // Test with negative values (mathematically valid)
2273        let mut unsorted = Unsorted::new();
2274        unsorted.extend(vec![-5, -3, -1, 1, 3, 5]);
2275        let result = unsorted.gini(None);
2276        // Sum is 0, so Gini is undefined
2277        assert_eq!(result, None);
2278
2279        // Test with negative values that don't sum to zero
2280        let mut unsorted = Unsorted::new();
2281        unsorted.extend(vec![-2, -1, 0, 1, 2]);
2282        let result = unsorted.gini(None);
2283        // Sum is 0, so Gini is undefined
2284        assert_eq!(result, None);
2285
2286        // Test with values containing negatives that sum to non-zero
2287        // Gini is undefined for negative values, should return None
2288        let mut unsorted = Unsorted::new();
2289        unsorted.extend(vec![-1, 0, 1, 2, 3]);
2290        let result = unsorted.gini(None);
2291        assert_eq!(result, None);
2292    }
2293
2294    #[test]
2295    fn gini_known_cases() {
2296        // Test case: [1, 1, 1, 1, 1] - perfect equality
2297        let mut unsorted = Unsorted::new();
2298        unsorted.extend(vec![1, 1, 1, 1, 1]);
2299        let result = unsorted.gini(None).unwrap();
2300        assert!((result - 0.0).abs() < 1e-10);
2301
2302        // Test case: [0, 0, 0, 0, 1] - high inequality
2303        let mut unsorted = Unsorted::new();
2304        unsorted.extend(vec![0, 0, 0, 0, 1]);
2305        let result = unsorted.gini(None).unwrap();
2306        // G = (2 * 5 * 1) / (5 * 1) - 6/5 = 2 - 1.2 = 0.8
2307        assert!((result - 0.8).abs() < 1e-10);
2308
2309        // Test case: [1, 2, 3] - moderate inequality
2310        let mut unsorted = Unsorted::new();
2311        unsorted.extend(vec![1, 2, 3]);
2312        let result = unsorted.gini(None).unwrap();
2313        // sum = 6, weighted_sum = 1*1 + 2*2 + 3*3 = 1 + 4 + 9 = 14
2314        // G = (2 * 14) / (3 * 6) - 4/3 = 28/18 - 4/3 = 1.5556 - 1.3333 = 0.2222
2315        let expected = (2.0 * 14.0) / (3.0 * 6.0) - 4.0 / 3.0;
2316        assert!((result - expected).abs() < 1e-10);
2317    }
2318
2319    #[test]
2320    fn gini_precalc_sum() {
2321        // Test with pre-calculated sum
2322        let mut unsorted = Unsorted::new();
2323        unsorted.extend(vec![1, 2, 3, 4, 5]);
2324        let precalc_sum = Some(15.0);
2325        let result = unsorted.gini(precalc_sum).unwrap();
2326        let expected = (2.0 * 55.0) / (5.0 * 15.0) - 6.0 / 5.0;
2327        assert!((result - expected).abs() < 1e-10);
2328
2329        // Test that pre-calculated sum gives same result
2330        let mut unsorted2 = Unsorted::new();
2331        unsorted2.extend(vec![1, 2, 3, 4, 5]);
2332        let result2 = unsorted2.gini(None).unwrap();
2333        assert!((result - result2).abs() < 1e-10);
2334    }
2335
2336    #[test]
2337    fn gini_large_dataset() {
2338        // Test with larger dataset to exercise parallel path
2339        let data: Vec<i32> = (1..=1000).collect();
2340        let result = gini(data.iter().copied(), None);
2341        assert!(result.is_some());
2342        let gini_val = result.unwrap();
2343        // For uniform distribution, Gini should be positive but not too high
2344        assert!(gini_val > 0.0 && gini_val < 0.5);
2345    }
2346
2347    #[test]
2348    fn gini_unsorted_vs_sorted() {
2349        // Test that sorting doesn't affect result
2350        let mut unsorted1 = Unsorted::new();
2351        unsorted1.extend(vec![5, 2, 8, 1, 9, 3, 7, 4, 6]);
2352        let result1 = unsorted1.gini(None).unwrap();
2353
2354        let mut unsorted2 = Unsorted::new();
2355        unsorted2.extend(vec![1, 2, 3, 4, 5, 6, 7, 8, 9]);
2356        let result2 = unsorted2.gini(None).unwrap();
2357
2358        assert!((result1 - result2).abs() < 1e-10);
2359    }
2360
2361    #[test]
2362    fn gini_small_values() {
2363        // Test with very small values
2364        let mut unsorted = Unsorted::new();
2365        unsorted.extend(vec![0.001, 0.002, 0.003, 0.004, 0.005]);
2366        let result = unsorted.gini(None);
2367        assert!(result.is_some());
2368        // Should be same as [1, 2, 3, 4, 5] scaled down
2369        let expected = (2.0 * 55.0) / (5.0 * 15.0) - 6.0 / 5.0;
2370        assert!((result.unwrap() - expected).abs() < 1e-10);
2371    }
2372
2373    #[test]
2374    fn gini_large_values() {
2375        // Test with large values
2376        let mut unsorted = Unsorted::new();
2377        unsorted.extend(vec![1000, 2000, 3000, 4000, 5000]);
2378        let result = unsorted.gini(None);
2379        assert!(result.is_some());
2380        // Should be same as [1, 2, 3, 4, 5] scaled up
2381        let expected = (2.0 * 55.0) / (5.0 * 15.0) - 6.0 / 5.0;
2382        assert!((result.unwrap() - expected).abs() < 1e-10);
2383    }
2384
2385    #[test]
2386    fn gini_two_elements() {
2387        // Test with exactly 2 elements
2388        let mut unsorted = Unsorted::new();
2389        unsorted.extend(vec![1, 2]);
2390        let result = unsorted.gini(None).unwrap();
2391        // For [1, 2]: sum=3, weighted_sum=1*1+2*2=5, n=2
2392        // G = (2*5)/(2*3) - 3/2 = 10/6 - 1.5 = 1.6667 - 1.5 = 0.1667
2393        let expected = (2.0 * 5.0) / (2.0 * 3.0) - 3.0 / 2.0;
2394        assert!((result - expected).abs() < 1e-10);
2395    }
2396
2397    #[test]
2398    fn gini_precalc_sum_zero() {
2399        // Test with pre-calculated sum of zero (should return None)
2400        let mut unsorted = Unsorted::new();
2401        unsorted.extend(vec![1, 2, 3, 4, 5]);
2402        let result = unsorted.gini(Some(0.0));
2403        assert_eq!(result, None);
2404    }
2405
2406    #[test]
2407    fn gini_precalc_sum_negative() {
2408        // Gini is undefined for negative values, should return None
2409        let mut unsorted = Unsorted::new();
2410        unsorted.extend(vec![-5, -3, -1, 1, 3]);
2411        let result = unsorted.gini(None);
2412        assert_eq!(result, None);
2413
2414        // Negative precalculated sum should also return None
2415        let mut unsorted = Unsorted::new();
2416        unsorted.extend(vec![1, 2, 3]);
2417        let result = unsorted.gini(Some(-5.0));
2418        assert_eq!(result, None);
2419    }
2420
2421    #[test]
2422    fn gini_different_types() {
2423        // Test with different integer types
2424        let mut unsorted_u32 = Unsorted::new();
2425        unsorted_u32.extend(vec![1u32, 2, 3, 4, 5]);
2426        let result_u32 = unsorted_u32.gini(None).unwrap();
2427
2428        let mut unsorted_i64 = Unsorted::new();
2429        unsorted_i64.extend(vec![1i64, 2, 3, 4, 5]);
2430        let result_i64 = unsorted_i64.gini(None).unwrap();
2431
2432        let expected = (2.0 * 55.0) / (5.0 * 15.0) - 6.0 / 5.0;
2433        assert!((result_u32 - expected).abs() < 1e-10);
2434        assert!((result_i64 - expected).abs() < 1e-10);
2435    }
2436
2437    #[test]
2438    fn gini_extreme_inequality() {
2439        // Test with extreme inequality: one very large value, many zeros
2440        let mut unsorted = Unsorted::new();
2441        unsorted.extend(vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 1000]);
2442        let result = unsorted.gini(None).unwrap();
2443        // For [0,0,0,0,0,0,0,0,0,1000]: sum=1000, weighted_sum=10*1000=10000, n=10
2444        // G = (2*10000)/(10*1000) - 11/10 = 20/10 - 1.1 = 2 - 1.1 = 0.9
2445        assert!((result - 0.9).abs() < 1e-10);
2446    }
2447
2448    #[test]
2449    fn gini_duplicate_values() {
2450        // Test with many duplicate values
2451        let mut unsorted = Unsorted::new();
2452        unsorted.extend(vec![1, 1, 1, 5, 5, 5, 10, 10, 10]);
2453        let result = unsorted.gini(None);
2454        assert!(result.is_some());
2455        // Should be between 0 and 1
2456        let gini_val = result.unwrap();
2457        assert!((0.0..=1.0).contains(&gini_val));
2458    }
2459
2460    #[test]
2461    fn kurtosis_empty() {
2462        let mut unsorted: Unsorted<i32> = Unsorted::new();
2463        assert_eq!(unsorted.kurtosis(None, None), None);
2464        let empty_vec: Vec<i32> = vec![];
2465        assert_eq!(kurtosis(empty_vec.into_iter(), None, None), None);
2466    }
2467
2468    #[test]
2469    fn kurtosis_small() {
2470        // Need at least 4 elements
2471        let mut unsorted = Unsorted::new();
2472        unsorted.extend(vec![1, 2]);
2473        assert_eq!(unsorted.kurtosis(None, None), None);
2474
2475        let mut unsorted = Unsorted::new();
2476        unsorted.extend(vec![1, 2, 3]);
2477        assert_eq!(unsorted.kurtosis(None, None), None);
2478    }
2479
2480    #[test]
2481    fn kurtosis_normal_distribution() {
2482        // Normal distribution should have kurtosis close to 0
2483        let mut unsorted = Unsorted::new();
2484        unsorted.extend(vec![1, 2, 3, 4, 5]);
2485        let result = unsorted.kurtosis(None, None);
2486        assert!(result.is_some());
2487        // For small samples, kurtosis can vary significantly
2488    }
2489
2490    #[test]
2491    fn kurtosis_all_same() {
2492        // All same values - variance is 0, kurtosis undefined
2493        let mut unsorted = Unsorted::new();
2494        unsorted.extend(vec![5, 5, 5, 5]);
2495        assert_eq!(unsorted.kurtosis(None, None), None);
2496    }
2497
2498    #[test]
2499    fn kurtosis_stream() {
2500        let result = kurtosis(vec![1usize, 2, 3, 4, 5].into_iter(), None, None);
2501        assert!(result.is_some());
2502    }
2503
2504    #[test]
2505    fn kurtosis_precalc_mean_variance() {
2506        // Test with pre-calculated mean and variance
2507        let mut unsorted = Unsorted::new();
2508        unsorted.extend(vec![1, 2, 3, 4, 5]);
2509
2510        // Calculate mean and variance manually
2511        let mean = 3.0f64;
2512        let variance = ((1.0f64 - 3.0).powi(2)
2513            + (2.0f64 - 3.0).powi(2)
2514            + (3.0f64 - 3.0).powi(2)
2515            + (4.0f64 - 3.0).powi(2)
2516            + (5.0f64 - 3.0).powi(2))
2517            / 5.0;
2518
2519        let result = unsorted.kurtosis(Some(mean), Some(variance));
2520        assert!(result.is_some());
2521
2522        // Test that pre-calculated values give same result
2523        let mut unsorted2 = Unsorted::new();
2524        unsorted2.extend(vec![1, 2, 3, 4, 5]);
2525        let result2 = unsorted2.kurtosis(None, None);
2526        assert!((result.unwrap() - result2.unwrap()).abs() < 1e-10);
2527    }
2528
2529    #[test]
2530    fn kurtosis_precalc_mean_only() {
2531        // Test with pre-calculated mean only
2532        let mut unsorted = Unsorted::new();
2533        unsorted.extend(vec![1, 2, 3, 4, 5]);
2534        let mean = 3.0f64;
2535
2536        let result = unsorted.kurtosis(Some(mean), None);
2537        assert!(result.is_some());
2538
2539        // Test that pre-calculated mean gives same result
2540        let mut unsorted2 = Unsorted::new();
2541        unsorted2.extend(vec![1, 2, 3, 4, 5]);
2542        let result2 = unsorted2.kurtosis(None, None);
2543        assert!((result.unwrap() - result2.unwrap()).abs() < 1e-10);
2544    }
2545
2546    #[test]
2547    fn kurtosis_precalc_variance_only() {
2548        // Test with pre-calculated variance only
2549        let mut unsorted = Unsorted::new();
2550        unsorted.extend(vec![1, 2, 3, 4, 5]);
2551        let variance = ((1.0f64 - 3.0).powi(2)
2552            + (2.0f64 - 3.0).powi(2)
2553            + (3.0f64 - 3.0).powi(2)
2554            + (4.0f64 - 3.0).powi(2)
2555            + (5.0f64 - 3.0).powi(2))
2556            / 5.0;
2557
2558        let result = unsorted.kurtosis(None, Some(variance));
2559        assert!(result.is_some());
2560
2561        // Test that pre-calculated variance gives same result
2562        let mut unsorted2 = Unsorted::new();
2563        unsorted2.extend(vec![1, 2, 3, 4, 5]);
2564        let result2 = unsorted2.kurtosis(None, None);
2565        assert!((result.unwrap() - result2.unwrap()).abs() < 1e-10);
2566    }
2567
2568    #[test]
2569    fn kurtosis_exact_calculation() {
2570        // Test with exact calculation for [1, 2, 3, 4]
2571        // Mean = 2.5
2572        // Variance = ((1-2.5)^2 + (2-2.5)^2 + (3-2.5)^2 + (4-2.5)^2) / 4 = (2.25 + 0.25 + 0.25 + 2.25) / 4 = 1.25
2573        // Variance^2 = 1.5625
2574        // Fourth powers: (1-2.5)^4 + (2-2.5)^4 + (3-2.5)^4 + (4-2.5)^4 = 5.0625 + 0.0625 + 0.0625 + 5.0625 = 10.25
2575        // n = 4
2576        // Kurtosis = (4*5*10.25) / (3*2*1*1.5625) - 3*3*3/(2*1) = 205 / 9.375 - 13.5 = 21.8667 - 13.5 = 8.3667
2577        let mut unsorted = Unsorted::new();
2578        unsorted.extend(vec![1, 2, 3, 4]);
2579        let result = unsorted.kurtosis(None, None).unwrap();
2580        // For small samples, kurtosis can be very high
2581        assert!(result.is_finite());
2582    }
2583
2584    #[test]
2585    fn kurtosis_uniform_distribution() {
2586        // Uniform distribution should have negative excess kurtosis
2587        let mut unsorted = Unsorted::new();
2588        unsorted.extend(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
2589        let result = unsorted.kurtosis(None, None).unwrap();
2590        // Uniform distribution has excess kurtosis = -1.2
2591        // But for small samples, it can vary significantly
2592        assert!(result.is_finite());
2593    }
2594
2595    #[test]
2596    fn kurtosis_large_dataset() {
2597        // Test with larger dataset to exercise parallel path
2598        let data: Vec<i32> = (1..=1000).collect();
2599        let result = kurtosis(data.iter().copied(), None, None);
2600        assert!(result.is_some());
2601        let kurt_val = result.unwrap();
2602        assert!(kurt_val.is_finite());
2603    }
2604
2605    #[test]
2606    fn kurtosis_unsorted_vs_sorted() {
2607        // Test that sorting doesn't affect result
2608        let mut unsorted1 = Unsorted::new();
2609        unsorted1.extend(vec![5, 2, 8, 1, 9, 3, 7, 4, 6]);
2610        let result1 = unsorted1.kurtosis(None, None).unwrap();
2611
2612        let mut unsorted2 = Unsorted::new();
2613        unsorted2.extend(vec![1, 2, 3, 4, 5, 6, 7, 8, 9]);
2614        let result2 = unsorted2.kurtosis(None, None).unwrap();
2615
2616        assert!((result1 - result2).abs() < 1e-10);
2617    }
2618
2619    #[test]
2620    fn kurtosis_minimum_size() {
2621        // Test with exactly 4 elements (minimum required)
2622        let mut unsorted = Unsorted::new();
2623        unsorted.extend(vec![1, 2, 3, 4]);
2624        let result = unsorted.kurtosis(None, None);
2625        assert!(result.is_some());
2626        assert!(result.unwrap().is_finite());
2627    }
2628
2629    #[test]
2630    fn kurtosis_heavy_tailed() {
2631        // Test with heavy-tailed distribution (outliers)
2632        let mut unsorted = Unsorted::new();
2633        unsorted.extend(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 100]);
2634        let result = unsorted.kurtosis(None, None).unwrap();
2635        // Heavy tails should give positive excess kurtosis
2636        assert!(result.is_finite());
2637        // With an outlier, kurtosis should be positive
2638        assert!(result > -10.0); // Allow some variance but should be reasonable
2639    }
2640
2641    #[test]
2642    fn kurtosis_light_tailed() {
2643        // Test with light-tailed distribution (values close together)
2644        let mut unsorted = Unsorted::new();
2645        unsorted.extend(vec![10, 11, 12, 13, 14, 15, 16, 17, 18, 19]);
2646        let result = unsorted.kurtosis(None, None).unwrap();
2647        // Light tails might give negative excess kurtosis
2648        assert!(result.is_finite());
2649    }
2650
2651    #[test]
2652    fn kurtosis_small_variance() {
2653        // Test with very small variance (values very close together)
2654        let mut unsorted = Unsorted::new();
2655        unsorted.extend(vec![10.0, 10.001, 10.002, 10.003, 10.004]);
2656        let result = unsorted.kurtosis(None, None);
2657        // Should still compute (variance is very small but non-zero)
2658        assert!(result.is_some());
2659        assert!(result.unwrap().is_finite());
2660    }
2661
2662    #[test]
2663    fn kurtosis_precalc_zero_variance() {
2664        // Test with pre-calculated variance of zero (should return None)
2665        let mut unsorted = Unsorted::new();
2666        unsorted.extend(vec![1, 2, 3, 4, 5]);
2667        let result = unsorted.kurtosis(None, Some(0.0));
2668        assert_eq!(result, None);
2669    }
2670
2671    #[test]
2672    fn kurtosis_precalc_negative_variance() {
2673        // Test with negative variance (invalid, but should handle gracefully)
2674        let mut unsorted = Unsorted::new();
2675        unsorted.extend(vec![1, 2, 3, 4, 5]);
2676        // Negative variance is invalid, but function should handle it
2677        let result = unsorted.kurtosis(None, Some(-1.0));
2678        // Should either return None or handle it gracefully
2679        // The function computes variance_sq = variance^2, so negative becomes positive
2680        // But this is invalid input, so behavior may vary
2681        // For now, just check it doesn't panic
2682        let _ = result;
2683    }
2684
2685    #[test]
2686    fn kurtosis_different_types() {
2687        // Test with different integer types
2688        let mut unsorted_u32 = Unsorted::new();
2689        unsorted_u32.extend(vec![1u32, 2, 3, 4, 5]);
2690        let result_u32 = unsorted_u32.kurtosis(None, None).unwrap();
2691
2692        let mut unsorted_i64 = Unsorted::new();
2693        unsorted_i64.extend(vec![1i64, 2, 3, 4, 5]);
2694        let result_i64 = unsorted_i64.kurtosis(None, None).unwrap();
2695
2696        assert!((result_u32 - result_i64).abs() < 1e-10);
2697    }
2698
2699    #[test]
2700    fn kurtosis_floating_point_precision() {
2701        // Test floating point precision
2702        let mut unsorted = Unsorted::new();
2703        unsorted.extend(vec![1.1, 2.2, 3.3, 4.4, 5.5]);
2704        let result = unsorted.kurtosis(None, None);
2705        assert!(result.is_some());
2706        assert!(result.unwrap().is_finite());
2707    }
2708
2709    #[test]
2710    fn kurtosis_negative_values() {
2711        // Test with negative values
2712        let mut unsorted = Unsorted::new();
2713        unsorted.extend(vec![-5, -3, -1, 1, 3, 5]);
2714        let result = unsorted.kurtosis(None, None);
2715        assert!(result.is_some());
2716        assert!(result.unwrap().is_finite());
2717    }
2718
2719    #[test]
2720    fn kurtosis_mixed_positive_negative() {
2721        // Test with mixed positive and negative values
2722        let mut unsorted = Unsorted::new();
2723        unsorted.extend(vec![-10, -5, 0, 5, 10]);
2724        let result = unsorted.kurtosis(None, None);
2725        assert!(result.is_some());
2726        assert!(result.unwrap().is_finite());
2727    }
2728
2729    #[test]
2730    fn kurtosis_duplicate_values() {
2731        // Test with duplicate values (but not all same)
2732        let mut unsorted = Unsorted::new();
2733        unsorted.extend(vec![1, 1, 2, 2, 3, 3, 4, 4, 5, 5]);
2734        let result = unsorted.kurtosis(None, None);
2735        assert!(result.is_some());
2736        assert!(result.unwrap().is_finite());
2737    }
2738
2739    #[test]
2740    fn kurtosis_precalc_mean_wrong() {
2741        // Test that wrong pre-calculated mean gives wrong result
2742        let mut unsorted1 = Unsorted::new();
2743        unsorted1.extend(vec![1, 2, 3, 4, 5]);
2744        let correct_result = unsorted1.kurtosis(None, None).unwrap();
2745
2746        let mut unsorted2 = Unsorted::new();
2747        unsorted2.extend(vec![1, 2, 3, 4, 5]);
2748        let wrong_mean = 10.0; // Wrong mean
2749        let wrong_result = unsorted2.kurtosis(Some(wrong_mean), None).unwrap();
2750
2751        // Results should be different
2752        assert!((correct_result - wrong_result).abs() > 1e-5);
2753    }
2754
2755    #[test]
2756    fn percentile_rank_empty() {
2757        let mut unsorted: Unsorted<i32> = Unsorted::new();
2758        assert_eq!(unsorted.percentile_rank(5), None);
2759        let empty_vec: Vec<i32> = vec![];
2760        assert_eq!(percentile_rank(empty_vec.into_iter(), 5), None);
2761    }
2762
2763    #[test]
2764    fn percentile_rank_basic() {
2765        let mut unsorted = Unsorted::new();
2766        unsorted.extend(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
2767
2768        // Value less than all
2769        assert_eq!(unsorted.percentile_rank(0), Some(0.0));
2770
2771        // Value greater than all
2772        assert_eq!(unsorted.percentile_rank(11), Some(100.0));
2773
2774        // Median (5) should be around 50th percentile
2775        let rank = unsorted.percentile_rank(5).unwrap();
2776        assert!((rank - 50.0).abs() < 1.0);
2777
2778        // First value should be at 10th percentile
2779        let rank = unsorted.percentile_rank(1).unwrap();
2780        assert!((rank - 10.0).abs() < 1.0);
2781    }
2782
2783    #[test]
2784    fn percentile_rank_duplicates() {
2785        let mut unsorted = Unsorted::new();
2786        unsorted.extend(vec![1, 1, 2, 2, 3, 3, 4, 4, 5, 5]);
2787
2788        // Value 2 appears twice, should be at 40th percentile (4 values <= 2)
2789        let rank = unsorted.percentile_rank(2).unwrap();
2790        assert!((rank - 40.0).abs() < 1.0);
2791    }
2792
2793    #[test]
2794    fn percentile_rank_stream() {
2795        let result = percentile_rank(vec![1usize, 2, 3, 4, 5].into_iter(), 3);
2796        assert_eq!(result, Some(60.0)); // 3 out of 5 values <= 3
2797    }
2798
2799    #[test]
2800    fn percentile_rank_many_ties() {
2801        // 100 copies of 5 followed by 100 copies of 10 — tests O(log n) upper bound
2802        let mut unsorted = Unsorted::new();
2803        for _ in 0..100 {
2804            unsorted.add(5u32);
2805        }
2806        for _ in 0..100 {
2807            unsorted.add(10u32);
2808        }
2809        // 100 values <= 5 out of 200
2810        let rank = unsorted.percentile_rank(5).unwrap();
2811        assert!((rank - 50.0).abs() < f64::EPSILON);
2812        // All 200 values <= 10
2813        let mut unsorted2 = Unsorted::new();
2814        for _ in 0..100 {
2815            unsorted2.add(5u32);
2816        }
2817        for _ in 0..100 {
2818            unsorted2.add(10u32);
2819        }
2820        let rank = unsorted2.percentile_rank(10).unwrap();
2821        assert!((rank - 100.0).abs() < f64::EPSILON);
2822    }
2823
2824    #[test]
2825    fn atkinson_empty() {
2826        let mut unsorted: Unsorted<i32> = Unsorted::new();
2827        assert_eq!(unsorted.atkinson(1.0, None, None), None);
2828        let empty_vec: Vec<i32> = vec![];
2829        assert_eq!(atkinson(empty_vec.into_iter(), 1.0, None, None), None);
2830    }
2831
2832    #[test]
2833    fn atkinson_single_element() {
2834        let mut unsorted = Unsorted::new();
2835        unsorted.add(5);
2836        assert_eq!(unsorted.atkinson(1.0, None, None), Some(0.0));
2837        assert_eq!(atkinson(vec![5].into_iter(), 1.0, None, None), Some(0.0));
2838    }
2839
2840    #[test]
2841    fn atkinson_perfect_equality() {
2842        // All values the same - perfect equality, Atkinson = 0
2843        let mut unsorted = Unsorted::new();
2844        unsorted.extend(vec![10, 10, 10, 10, 10]);
2845        let result = unsorted.atkinson(1.0, None, None).unwrap();
2846        assert!((result - 0.0).abs() < 1e-10);
2847    }
2848
2849    #[test]
2850    fn atkinson_epsilon_zero() {
2851        // Epsilon = 0 means no inequality aversion, should return 0
2852        let mut unsorted = Unsorted::new();
2853        unsorted.extend(vec![1, 2, 3, 4, 5]);
2854        let result = unsorted.atkinson(0.0, None, None).unwrap();
2855        assert!((result - 0.0).abs() < 1e-10);
2856    }
2857
2858    #[test]
2859    fn atkinson_epsilon_one() {
2860        // Epsilon = 1 uses geometric mean
2861        let mut unsorted = Unsorted::new();
2862        unsorted.extend(vec![1, 2, 3, 4, 5]);
2863        let result = unsorted.atkinson(1.0, None, None);
2864        assert!(result.is_some());
2865    }
2866
2867    #[test]
2868    fn atkinson_epsilon_one_rejects_nan() {
2869        // NaN in the data must return None, not Some(NaN), for the fused
2870        // (epsilon=1, no precalc) fast path.
2871        let mut unsorted = Unsorted::new();
2872        unsorted.extend(vec![1.0_f64, 2.0, f64::NAN, 4.0, 5.0]);
2873        assert_eq!(unsorted.atkinson(1.0, None, None), None);
2874    }
2875
2876    #[test]
2877    fn atkinson_negative_epsilon() {
2878        let mut unsorted = Unsorted::new();
2879        unsorted.extend(vec![1, 2, 3, 4, 5]);
2880        assert_eq!(unsorted.atkinson(-1.0, None, None), None);
2881    }
2882
2883    #[test]
2884    fn atkinson_zero_mean() {
2885        // If mean is zero, Atkinson is undefined
2886        let mut unsorted = Unsorted::new();
2887        unsorted.extend(vec![0, 0, 0, 0]);
2888        assert_eq!(unsorted.atkinson(1.0, None, None), None);
2889    }
2890
2891    #[test]
2892    fn atkinson_stream() {
2893        let result = atkinson(vec![1usize, 2, 3, 4, 5].into_iter(), 1.0, None, None);
2894        assert!(result.is_some());
2895    }
2896
2897    #[test]
2898    fn atkinson_precalc_mean_geometric_sum() {
2899        // Test with pre-calculated mean and geometric_sum
2900        let mut unsorted = Unsorted::new();
2901        unsorted.extend(vec![1, 2, 3, 4, 5]);
2902
2903        // Calculate mean and geometric_sum manually
2904        let mean = 3.0f64;
2905        let geometric_sum = 1.0f64.ln() + 2.0f64.ln() + 3.0f64.ln() + 4.0f64.ln() + 5.0f64.ln();
2906
2907        let result = unsorted.atkinson(1.0, Some(mean), Some(geometric_sum));
2908        assert!(result.is_some());
2909
2910        // Test that pre-calculated values give same result
2911        let mut unsorted2 = Unsorted::new();
2912        unsorted2.extend(vec![1, 2, 3, 4, 5]);
2913        let result2 = unsorted2.atkinson(1.0, None, None);
2914        assert!((result.unwrap() - result2.unwrap()).abs() < 1e-10);
2915    }
2916
2917    #[test]
2918    fn atkinson_precalc_mean_only() {
2919        // Test with pre-calculated mean only
2920        let mut unsorted = Unsorted::new();
2921        unsorted.extend(vec![1, 2, 3, 4, 5]);
2922        let mean = 3.0f64;
2923
2924        let result = unsorted.atkinson(1.0, Some(mean), None);
2925        assert!(result.is_some());
2926
2927        // Test that pre-calculated mean gives same result
2928        let mut unsorted2 = Unsorted::new();
2929        unsorted2.extend(vec![1, 2, 3, 4, 5]);
2930        let result2 = unsorted2.atkinson(1.0, None, None);
2931        assert!((result.unwrap() - result2.unwrap()).abs() < 1e-10);
2932    }
2933
2934    #[test]
2935    fn atkinson_precalc_geometric_sum_only() {
2936        // Test with pre-calculated geometric_sum only
2937        let mut unsorted = Unsorted::new();
2938        unsorted.extend(vec![1, 2, 3, 4, 5]);
2939        let geometric_sum = 1.0f64.ln() + 2.0f64.ln() + 3.0f64.ln() + 4.0f64.ln() + 5.0f64.ln();
2940
2941        let result = unsorted.atkinson(1.0, None, Some(geometric_sum));
2942        assert!(result.is_some());
2943
2944        // Test that pre-calculated geometric_sum gives same result
2945        let mut unsorted2 = Unsorted::new();
2946        unsorted2.extend(vec![1, 2, 3, 4, 5]);
2947        let result2 = unsorted2.atkinson(1.0, None, None);
2948        assert!((result.unwrap() - result2.unwrap()).abs() < 1e-10);
2949    }
2950
2951    #[test]
2952    fn test_median_with_infinity() {
2953        let mut unsorted = Unsorted::new();
2954        unsorted.extend(vec![1.0f64, 2.0, f64::INFINITY]);
2955        assert_eq!(unsorted.median(), Some(2.0));
2956    }
2957
2958    #[test]
2959    fn test_median_with_neg_infinity() {
2960        let mut unsorted = Unsorted::new();
2961        unsorted.extend(vec![f64::NEG_INFINITY, 1.0f64, 2.0]);
2962        assert_eq!(unsorted.median(), Some(1.0));
2963    }
2964
2965    #[test]
2966    fn test_quartiles_with_infinity() {
2967        let mut unsorted = Unsorted::new();
2968        unsorted.extend(vec![f64::NEG_INFINITY, 1.0, 2.0, 3.0, f64::INFINITY]);
2969        let q = unsorted.quartiles();
2970        // Q2 (median) should be 2.0
2971        assert!(q.is_some());
2972        let (_, q2, _) = q.unwrap();
2973        assert_eq!(q2, 2.0);
2974    }
2975
2976    #[test]
2977    fn test_mode_with_nan() {
2978        // NaN breaks the Ord contract via Partial<T>, so sort order is
2979        // non-deterministic. We only verify the call doesn't panic —
2980        // the exact mode value depends on where NaN lands after sorting.
2981        let mut unsorted: Unsorted<f64> = Unsorted::new();
2982        unsorted.extend(vec![1.0, f64::NAN, 2.0, 2.0, 3.0]);
2983        let _result = unsorted.mode(); // must not panic
2984    }
2985
2986    #[test]
2987    fn test_gini_with_infinity() {
2988        let mut unsorted = Unsorted::new();
2989        unsorted.extend(vec![1.0f64, 2.0, f64::INFINITY]);
2990        let g = unsorted.gini(None);
2991        // Gini with infinity in the data: the weighted_sum/sum ratio involves
2992        // Inf/Inf which is NaN, so the result is Some(NaN) — not a meaningful
2993        // Gini coefficient, but importantly does not panic
2994        assert!(g.unwrap().is_nan());
2995    }
2996
2997    #[test]
2998    fn test_cardinality_with_infinity() {
2999        let mut unsorted = Unsorted::new();
3000        unsorted.extend(vec![1.0f64, f64::INFINITY, f64::NEG_INFINITY, 1.0]);
3001        assert_eq!(unsorted.cardinality(false, 10_000), 3);
3002    }
3003}
3004
3005#[cfg(test)]
3006mod bench {
3007    use super::*;
3008    use std::time::Instant;
3009
3010    #[test]
3011    #[ignore] // Run with `cargo test comprehensive_quartiles_benchmark -- --ignored --nocapture` to see performance comparison
3012    fn comprehensive_quartiles_benchmark() {
3013        // Test a wide range of data sizes
3014        let data_sizes = vec![
3015            1_000, 10_000, 100_000, 500_000, 1_000_000, 2_000_000, 5_000_000, 10_000_000,
3016        ];
3017
3018        println!("=== COMPREHENSIVE QUARTILES BENCHMARK ===\n");
3019
3020        for size in data_sizes {
3021            println!("--- Testing with {} elements ---", size);
3022
3023            // Test different data patterns
3024            let test_patterns = vec![
3025                ("Random", generate_random_data(size)),
3026                ("Reverse Sorted", {
3027                    let mut v = Vec::with_capacity(size);
3028                    for x in (0..size).rev() {
3029                        v.push(x as i32);
3030                    }
3031                    v
3032                }),
3033                ("Already Sorted", {
3034                    let mut v = Vec::with_capacity(size);
3035                    for x in 0..size {
3036                        v.push(x as i32);
3037                    }
3038                    v
3039                }),
3040                ("Many Duplicates", {
3041                    // Create a vector with just a few distinct values repeated many times
3042                    let mut v = Vec::with_capacity(size);
3043                    let chunk_size = size / 100;
3044                    for i in 0..100 {
3045                        v.extend(std::iter::repeat_n(i, chunk_size));
3046                    }
3047                    // Add any remaining elements
3048                    v.extend(std::iter::repeat_n(0, size - v.len()));
3049                    v
3050                }),
3051            ];
3052
3053            for (pattern_name, test_data) in test_patterns {
3054                println!("\n  Pattern: {}", pattern_name);
3055
3056                // Benchmark sorting-based approach
3057                let mut unsorted1 = Unsorted::new();
3058                unsorted1.extend(test_data.clone());
3059
3060                let start = Instant::now();
3061                let result_sorted = unsorted1.quartiles();
3062                let sorted_time = start.elapsed();
3063
3064                // Benchmark selection-based approach (with copying)
3065                let mut unsorted2 = Unsorted::new();
3066                unsorted2.extend(test_data.clone());
3067
3068                let start = Instant::now();
3069                let result_selection = unsorted2.quartiles_with_selection();
3070                let selection_time = start.elapsed();
3071
3072                // Benchmark zero-copy selection-based approach
3073                let mut unsorted3 = Unsorted::new();
3074                unsorted3.extend(test_data);
3075
3076                let start = Instant::now();
3077                let result_zero_copy = unsorted3.quartiles_zero_copy();
3078                let zero_copy_time = start.elapsed();
3079
3080                // Verify results are the same
3081                assert_eq!(result_sorted, result_selection);
3082                assert_eq!(result_sorted, result_zero_copy);
3083
3084                let selection_speedup =
3085                    sorted_time.as_nanos() as f64 / selection_time.as_nanos() as f64;
3086                let zero_copy_speedup =
3087                    sorted_time.as_nanos() as f64 / zero_copy_time.as_nanos() as f64;
3088
3089                println!("    Sorting:       {:>12?}", sorted_time);
3090                println!(
3091                    "    Selection:     {:>12?} (speedup: {:.2}x)",
3092                    selection_time, selection_speedup
3093                );
3094                println!(
3095                    "    Zero-copy:     {:>12?} (speedup: {:.2}x)",
3096                    zero_copy_time, zero_copy_speedup
3097                );
3098
3099                let best_algorithm =
3100                    if zero_copy_speedup > 1.0 && zero_copy_speedup >= selection_speedup {
3101                        "ZERO-COPY"
3102                    } else if selection_speedup > 1.0 {
3103                        "SELECTION"
3104                    } else {
3105                        "SORTING"
3106                    };
3107                println!("    Best: {}", best_algorithm);
3108            }
3109
3110            println!(); // Add blank line between sizes
3111        }
3112    }
3113
3114    // Generate random data for benchmarking
3115    fn generate_random_data(size: usize) -> Vec<i32> {
3116        // Simple LCG random number generator for reproducible results
3117        let mut rng = 1234567u64;
3118        let mut vec = Vec::with_capacity(size);
3119        for _ in 0..size {
3120            rng = rng.wrapping_mul(1103515245).wrapping_add(12345);
3121            vec.push((rng >> 16) as i32);
3122        }
3123        vec
3124    }
3125
3126    #[test]
3127    #[ignore] // Run with `cargo test find_selection_threshold -- --ignored --nocapture` to find exact threshold
3128    fn find_selection_threshold() {
3129        println!("=== FINDING SELECTION ALGORITHM THRESHOLD ===\n");
3130
3131        // Binary search approach to find the threshold
3132        let mut found_threshold = None;
3133        let test_sizes = vec![
3134            1_000_000, 2_000_000, 3_000_000, 4_000_000, 5_000_000, 7_500_000, 10_000_000,
3135            15_000_000, 20_000_000, 25_000_000, 30_000_000,
3136        ];
3137
3138        for size in test_sizes {
3139            println!("Testing size: {}", size);
3140
3141            // Use random data as it's most representative of real-world scenarios
3142            let test_data = generate_random_data(size);
3143
3144            // Run multiple iterations to get average performance
3145            let iterations = 3;
3146            let mut sorting_total = 0u128;
3147            let mut selection_total = 0u128;
3148            let mut zero_copy_total = 0u128;
3149
3150            for i in 0..iterations {
3151                println!("  Iteration {}/{}", i + 1, iterations);
3152
3153                // Sorting approach
3154                let mut unsorted1 = Unsorted::new();
3155                unsorted1.extend(test_data.clone());
3156
3157                let start = Instant::now();
3158                let _result_sorted = unsorted1.quartiles();
3159                sorting_total += start.elapsed().as_nanos();
3160
3161                // Selection approach (with copying)
3162                let mut unsorted2 = Unsorted::new();
3163                unsorted2.extend(test_data.clone());
3164
3165                let start = Instant::now();
3166                let _result_selection = unsorted2.quartiles_with_selection();
3167                selection_total += start.elapsed().as_nanos();
3168
3169                // Zero-copy selection approach
3170                let mut unsorted3 = Unsorted::new();
3171                unsorted3.extend(test_data.clone());
3172
3173                let start = Instant::now();
3174                let _result_zero_copy = unsorted3.quartiles_zero_copy();
3175                zero_copy_total += start.elapsed().as_nanos();
3176            }
3177
3178            let avg_sorting = sorting_total / iterations as u128;
3179            let avg_selection = selection_total / iterations as u128;
3180            let avg_zero_copy = zero_copy_total / iterations as u128;
3181            let selection_speedup = avg_sorting as f64 / avg_selection as f64;
3182            let zero_copy_speedup = avg_sorting as f64 / avg_zero_copy as f64;
3183
3184            println!(
3185                "  Average sorting:    {:>12.2}ms",
3186                avg_sorting as f64 / 1_000_000.0
3187            );
3188            println!(
3189                "  Average selection:  {:>12.2}ms (speedup: {:.2}x)",
3190                avg_selection as f64 / 1_000_000.0,
3191                selection_speedup
3192            );
3193            println!(
3194                "  Average zero-copy:  {:>12.2}ms (speedup: {:.2}x)",
3195                avg_zero_copy as f64 / 1_000_000.0,
3196                zero_copy_speedup
3197            );
3198
3199            if (selection_speedup > 1.0 || zero_copy_speedup > 1.0) && found_threshold.is_none() {
3200                found_threshold = Some(size);
3201                let best_method = if zero_copy_speedup > selection_speedup {
3202                    "Zero-copy"
3203                } else {
3204                    "Selection"
3205                };
3206                println!(
3207                    "  *** THRESHOLD FOUND: {} becomes faster at {} elements ***",
3208                    best_method, size
3209                );
3210            }
3211
3212            println!();
3213        }
3214
3215        match found_threshold {
3216            Some(threshold) => println!(
3217                "🎯 Selection algorithm becomes faster at approximately {} elements",
3218                threshold
3219            ),
3220            None => println!("❌ Selection algorithm did not become faster in the tested range"),
3221        }
3222    }
3223
3224    #[test]
3225    #[ignore] // Run with `cargo test benchmark_different_data_types -- --ignored --nocapture` to test different data types
3226    fn benchmark_different_data_types() {
3227        println!("=== BENCHMARKING DIFFERENT DATA TYPES ===\n");
3228
3229        let size = 5_000_000; // Use a large size where differences might be visible
3230
3231        // Test with f64 (floating point)
3232        println!("Testing with f64 data:");
3233        let float_data: Vec<f64> = generate_random_data(size)
3234            .into_iter()
3235            .map(|x| x as f64 / 1000.0)
3236            .collect();
3237
3238        let mut unsorted1 = Unsorted::new();
3239        unsorted1.extend(float_data.clone());
3240        let start = Instant::now();
3241        let _result = unsorted1.quartiles();
3242        let sorting_time = start.elapsed();
3243
3244        let mut unsorted2 = Unsorted::new();
3245        unsorted2.extend(float_data.clone());
3246        let start = Instant::now();
3247        let _result = unsorted2.quartiles_with_selection();
3248        let selection_time = start.elapsed();
3249
3250        let mut unsorted3 = Unsorted::new();
3251        unsorted3.extend(float_data);
3252        let start = Instant::now();
3253        let _result = unsorted3.quartiles_zero_copy();
3254        let zero_copy_time = start.elapsed();
3255
3256        println!("  Sorting:    {:?}", sorting_time);
3257        println!("  Selection:  {:?}", selection_time);
3258        println!("  Zero-copy:  {:?}", zero_copy_time);
3259        println!(
3260            "  Selection Speedup:  {:.2}x",
3261            sorting_time.as_nanos() as f64 / selection_time.as_nanos() as f64
3262        );
3263        println!(
3264            "  Zero-copy Speedup:  {:.2}x\n",
3265            sorting_time.as_nanos() as f64 / zero_copy_time.as_nanos() as f64
3266        );
3267
3268        // Test with i64 (larger integers)
3269        println!("Testing with i64 data:");
3270        let int64_data: Vec<i64> = generate_random_data(size)
3271            .into_iter()
3272            .map(|x| x as i64 * 1000)
3273            .collect();
3274
3275        let mut unsorted1 = Unsorted::new();
3276        unsorted1.extend(int64_data.clone());
3277        let start = Instant::now();
3278        let _result = unsorted1.quartiles();
3279        let sorting_time = start.elapsed();
3280
3281        let mut unsorted2 = Unsorted::new();
3282        unsorted2.extend(int64_data.clone());
3283        let start = Instant::now();
3284        let _result = unsorted2.quartiles_with_selection();
3285        let selection_time = start.elapsed();
3286
3287        let mut unsorted3 = Unsorted::new();
3288        unsorted3.extend(int64_data);
3289        let start = Instant::now();
3290        let _result = unsorted3.quartiles_zero_copy();
3291        let zero_copy_time = start.elapsed();
3292
3293        println!("  Sorting:    {:?}", sorting_time);
3294        println!("  Selection:  {:?}", selection_time);
3295        println!("  Zero-copy:  {:?}", zero_copy_time);
3296        println!(
3297            "  Selection Speedup:  {:.2}x",
3298            sorting_time.as_nanos() as f64 / selection_time.as_nanos() as f64
3299        );
3300        println!(
3301            "  Zero-copy Speedup:  {:.2}x",
3302            sorting_time.as_nanos() as f64 / zero_copy_time.as_nanos() as f64
3303        );
3304    }
3305}
stats/unsorted.rs

stats/
unsorted.rs