Skip to main content

stats/
unsorted.rs

1use num_traits::ToPrimitive;
2use rayon::iter::{IndexedParallelIterator, IntoParallelRefIterator, ParallelIterator};
3use rayon::prelude::ParallelSlice;
4use rayon::slice::ParallelSliceMut;
5
6use serde::{Deserialize, Serialize};
7
8use {crate::Commute, crate::Partial};
9
10// PARALLEL_THRESHOLD (10,000) is the minimum dataset size for rayon parallel sort.
11// The separate 10,240 threshold in cardinality estimation (5 × 2,048) is aligned to
12// cache-line-friendly chunk sizes for parallel iterator reduction.
13const PARALLEL_THRESHOLD: usize = 10_000;
14
15/// Compute the exact median on a stream of data.
16///
17/// (This has time complexity `O(nlogn)` and space complexity `O(n)`.)
18#[inline]
19pub fn median<I>(it: I) -> Option<f64>
20where
21    I: Iterator,
22    <I as Iterator>::Item: PartialOrd + ToPrimitive + Send,
23{
24    it.collect::<Unsorted<_>>().median()
25}
26
27/// Compute the median absolute deviation (MAD) on a stream of data.
28#[inline]
29pub fn mad<I>(it: I, precalc_median: Option<f64>) -> Option<f64>
30where
31    I: Iterator,
32    <I as Iterator>::Item: PartialOrd + ToPrimitive + Send + Sync,
33{
34    it.collect::<Unsorted<_>>().mad(precalc_median)
35}
36
37/// Compute the exact 1-, 2-, and 3-quartiles (Q1, Q2 a.k.a. median, and Q3) on a stream of data.
38///
39/// (This has time complexity `O(n log n)` and space complexity `O(n)`.)
40#[inline]
41pub fn quartiles<I>(it: I) -> Option<(f64, f64, f64)>
42where
43    I: Iterator,
44    <I as Iterator>::Item: PartialOrd + ToPrimitive + Send,
45{
46    it.collect::<Unsorted<_>>().quartiles()
47}
48
49/// Compute the exact mode on a stream of data.
50///
51/// (This has time complexity `O(nlogn)` and space complexity `O(n)`.)
52///
53/// If the data does not have a mode, then `None` is returned.
54#[inline]
55pub fn mode<T, I>(it: I) -> Option<T>
56where
57    T: PartialOrd + Clone + Send,
58    I: Iterator<Item = T>,
59{
60    it.collect::<Unsorted<T>>().mode()
61}
62
63/// Compute the modes on a stream of data.
64///
65/// If there is a single mode, then only that value is returned in the `Vec`
66/// however, if there are multiple values tied for occurring the most amount of times
67/// those values are returned.
68///
69/// ## Example
70/// ```
71/// use stats;
72///
73/// let vals = vec![1, 1, 2, 2, 3];
74///
75/// assert_eq!(stats::modes(vals.into_iter()), (vec![1, 2], 2, 2));
76/// ```
77/// This has time complexity `O(n)`
78///
79/// If the data does not have a mode, then an empty `Vec` is returned.
80#[inline]
81pub fn modes<T, I>(it: I) -> (Vec<T>, usize, u32)
82where
83    T: PartialOrd + Clone + Send,
84    I: Iterator<Item = T>,
85{
86    it.collect::<Unsorted<T>>().modes()
87}
88
89/// Compute the antimodes on a stream of data.
90///
91/// Antimode is the least frequent non-zero score.
92///
93/// If there is a single antimode, then only that value is returned in the `Vec`
94/// however, if there are multiple values tied for occurring the least amount of times
95/// those values are returned.
96///
97/// Only the first 10 antimodes are returned to prevent returning the whole set
98/// when cardinality = number of records (i.e. all unique values).
99///
100/// ## Example
101/// ```
102/// use stats;
103///
104/// let vals = vec![1, 1, 2, 2, 3];
105///
106/// assert_eq!(stats::antimodes(vals.into_iter()), (vec![3], 1, 1));
107/// ```
108/// This has time complexity `O(n)`
109///
110/// If the data does not have an antimode, then an empty `Vec` is returned.
111#[inline]
112pub fn antimodes<T, I>(it: I) -> (Vec<T>, usize, u32)
113where
114    T: PartialOrd + Clone + Send,
115    I: Iterator<Item = T>,
116{
117    let (antimodes_result, antimodes_count, antimodes_occurrences) =
118        it.collect::<Unsorted<T>>().antimodes();
119    (antimodes_result, antimodes_count, antimodes_occurrences)
120}
121
122/// Compute the Gini Coefficient on a stream of data.
123///
124/// The Gini Coefficient measures inequality in a distribution, ranging from 0 (perfect equality)
125/// to 1 (perfect inequality).
126///
127/// (This has time complexity `O(n log n)` and space complexity `O(n)`.)
128#[inline]
129pub fn gini<I>(it: I, precalc_sum: Option<f64>) -> Option<f64>
130where
131    I: Iterator,
132    <I as Iterator>::Item: PartialOrd + ToPrimitive + Send + Sync,
133{
134    it.collect::<Unsorted<_>>().gini(precalc_sum)
135}
136
137/// Compute the kurtosis (excess kurtosis) on a stream of data.
138///
139/// Kurtosis measures the "tailedness" of a distribution. Excess kurtosis is kurtosis - 3,
140/// where 0 indicates a normal distribution, positive values indicate heavy tails, and
141/// negative values indicate light tails.
142///
143/// (This has time complexity `O(n log n)` and space complexity `O(n)`.)
144#[inline]
145pub fn kurtosis<I>(it: I, precalc_mean: Option<f64>, precalc_variance: Option<f64>) -> Option<f64>
146where
147    I: Iterator,
148    <I as Iterator>::Item: PartialOrd + ToPrimitive + Send + Sync,
149{
150    it.collect::<Unsorted<_>>()
151        .kurtosis(precalc_mean, precalc_variance)
152}
153
154/// Compute the percentile rank of a value on a stream of data.
155///
156/// Returns the percentile rank (0-100) of the given value in the distribution.
157/// If the value is less than all values, returns 0.0. If greater than all, returns 100.0.
158///
159/// (This has time complexity `O(n log n)` and space complexity `O(n)`.)
160#[inline]
161pub fn percentile_rank<I, V>(it: I, value: V) -> Option<f64>
162where
163    I: Iterator,
164    <I as Iterator>::Item: PartialOrd + ToPrimitive + Send + Sync,
165    V: PartialOrd + ToPrimitive,
166{
167    it.collect::<Unsorted<_>>().percentile_rank(value)
168}
169
170/// Compute the Atkinson Index on a stream of data.
171///
172/// The Atkinson Index measures inequality with an inequality aversion parameter ε.
173/// It ranges from 0 (perfect equality) to 1 (perfect inequality).
174/// Higher ε values give more weight to inequality at the lower end of the distribution.
175///
176/// (This has time complexity `O(n log n)` and space complexity `O(n)`.)
177#[inline]
178pub fn atkinson<I>(
179    it: I,
180    epsilon: f64,
181    precalc_mean: Option<f64>,
182    precalc_geometric_sum: Option<f64>,
183) -> Option<f64>
184where
185    I: Iterator,
186    <I as Iterator>::Item: PartialOrd + ToPrimitive + Send + Sync,
187{
188    it.collect::<Unsorted<_>>()
189        .atkinson(epsilon, precalc_mean, precalc_geometric_sum)
190}
191
192fn median_on_sorted<T>(data: &[T]) -> Option<f64>
193where
194    T: PartialOrd + ToPrimitive,
195{
196    Some(match data.len() {
197        // Empty slice case - return None early
198        0 => {
199            core::hint::cold_path();
200            return None;
201        }
202        // Single element case - return that element converted to f64
203        1 => data.first()?.to_f64()?,
204        // Even length case - average the two middle elements
205        len if len.is_multiple_of(2) => {
206            let idx = len / 2;
207            // Safety: we know these indices are valid because we checked len is even and non-zero,
208            // so idx-1 and idx are valid indices into data
209            let v1 = unsafe { data.get_unchecked(idx - 1) }.to_f64()?;
210            let v2 = unsafe { data.get_unchecked(idx) }.to_f64()?;
211            f64::midpoint(v1, v2)
212        }
213        // Odd length case - return the middle element
214        // Safety: we know the index is within bounds
215        len => unsafe { data.get_unchecked(len / 2) }.to_f64()?,
216    })
217}
218
219fn mad_on_sorted<T>(data: &[T], precalc_median: Option<f64>) -> Option<f64>
220where
221    T: Sync + PartialOrd + ToPrimitive,
222{
223    if data.is_empty() {
224        core::hint::cold_path();
225        return None;
226    }
227    // SAFETY: median_on_sorted returns None only when data is empty or when
228    // to_f64() returns None. Emptiness is checked above with cold_path(),
229    // and to_f64() is treated as infallible for the supported numeric types
230    // throughout this module (see unwrap_unchecked usages below).
231    let median_obs =
232        precalc_median.unwrap_or_else(|| unsafe { median_on_sorted(data).unwrap_unchecked() });
233
234    // Use adaptive parallel processing based on data size
235    let mut abs_diff_vec: Vec<f64> = if data.len() < PARALLEL_THRESHOLD {
236        // Sequential processing for small datasets
237        // Iterator collect enables TrustedLen optimization, eliminating per-element bounds checks
238        data.iter()
239            // SAFETY: to_f64() always returns Some for standard numeric types (f32/f64, i/u 8-64)
240            .map(|x| (median_obs - unsafe { x.to_f64().unwrap_unchecked() }).abs())
241            .collect()
242    } else {
243        // Parallel processing for large datasets
244        data.par_iter()
245            // SAFETY: to_f64() always returns Some for standard numeric types
246            .map(|x| (median_obs - unsafe { x.to_f64().unwrap_unchecked() }).abs())
247            .collect()
248    };
249
250    // Use select_nth_unstable to find the median in O(n) instead of O(n log n) full sort
251    let len = abs_diff_vec.len();
252    let mid = len / 2;
253    let cmp = |a: &f64, b: &f64| a.total_cmp(b);
254
255    abs_diff_vec.select_nth_unstable_by(mid, cmp);
256
257    if len.is_multiple_of(2) {
258        // Even length: need both mid-1 and mid elements
259        let right = abs_diff_vec[mid];
260        // The left partition [0..mid] contains elements <= abs_diff_vec[mid],
261        // so we can find the max of the left partition for mid-1
262        let left = abs_diff_vec[..mid]
263            .iter()
264            .max_by(|a, b| cmp(a, b))
265            .copied()?;
266        Some(f64::midpoint(left, right))
267    } else {
268        Some(abs_diff_vec[mid])
269    }
270}
271
272fn gini_on_sorted<T>(data: &[Partial<T>], precalc_sum: Option<f64>) -> Option<f64>
273where
274    T: Sync + PartialOrd + ToPrimitive,
275{
276    let len = data.len();
277
278    // Early return for empty data
279    if len == 0 {
280        core::hint::cold_path();
281        return None;
282    }
283
284    // Single element case: perfect equality, Gini = 0
285    if len == 1 {
286        core::hint::cold_path();
287        return Some(0.0);
288    }
289
290    // Gini coefficient is only defined for non-negative distributions.
291    // Since data is sorted, check the first (smallest) element.
292    // SAFETY: len > 1 guaranteed by checks above
293    let first_val = unsafe { data.get_unchecked(0).0.to_f64().unwrap_unchecked() };
294    if first_val < 0.0 {
295        core::hint::cold_path();
296        return None;
297    }
298
299    // Compute sum and weighted sum.
300    // When precalc_sum is provided, only compute weighted_sum in a single pass.
301    // When not provided, fuse both computations into a single pass over the data
302    // to halve cache pressure (following the fold/reduce pattern used in kurtosis).
303    let (sum, weighted_sum) = if let Some(precalc) = precalc_sum {
304        if precalc < 0.0 {
305            core::hint::cold_path();
306            return None;
307        }
308        // Only need weighted_sum — single pass
309        let weighted_sum = if len < PARALLEL_THRESHOLD {
310            let mut weighted_sum = 0.0;
311            for (i, x) in data.iter().enumerate() {
312                // SAFETY: to_f64() always returns Some for standard numeric types
313                let val = unsafe { x.0.to_f64().unwrap_unchecked() };
314                weighted_sum = ((i + 1) as f64).mul_add(val, weighted_sum);
315            }
316            weighted_sum
317        } else {
318            data.par_iter()
319                .enumerate()
320                .map(|(i, x)| {
321                    // SAFETY: to_f64() always returns Some for standard numeric types
322                    let val = unsafe { x.0.to_f64().unwrap_unchecked() };
323                    (i + 1) as f64 * val
324                })
325                .sum()
326        };
327        (precalc, weighted_sum)
328    } else if len < PARALLEL_THRESHOLD {
329        // Fused single pass: compute both sum and weighted_sum together
330        let mut sum = 0.0;
331        let mut weighted_sum = 0.0;
332        for (i, x) in data.iter().enumerate() {
333            // SAFETY: to_f64() always returns Some for standard numeric types (f32/f64, i/u 8-64)
334            let val = unsafe { x.0.to_f64().unwrap_unchecked() };
335            sum += val;
336            weighted_sum = ((i + 1) as f64).mul_add(val, weighted_sum);
337        }
338        (sum, weighted_sum)
339    } else {
340        // Fused parallel single pass using fold/reduce
341        data.par_iter()
342            .enumerate()
343            .fold(
344                || (0.0_f64, 0.0_f64),
345                |acc, (i, x)| {
346                    // SAFETY: to_f64() always returns Some for standard numeric types
347                    let val = unsafe { x.0.to_f64().unwrap_unchecked() };
348                    (acc.0 + val, ((i + 1) as f64).mul_add(val, acc.1))
349                },
350            )
351            .reduce(|| (0.0, 0.0), |a, b| (a.0 + b.0, a.1 + b.1))
352    };
353
354    // If sum is zero, Gini is undefined
355    if sum == 0.0 {
356        core::hint::cold_path();
357        return None;
358    }
359
360    // Compute Gini coefficient using the formula:
361    // G = (2 * Σ(i * y_i)) / (n * Σ(y_i)) - (n + 1) / n
362    // where i is 1-indexed rank and y_i are sorted values
363    let n = len as f64;
364    let gini = 2.0f64.mul_add(weighted_sum / (n * sum), -(n + 1.0) / n);
365
366    Some(gini)
367}
368
369fn kurtosis_on_sorted<T>(
370    data: &[Partial<T>],
371    precalc_mean: Option<f64>,
372    precalc_variance: Option<f64>,
373) -> Option<f64>
374where
375    T: Sync + PartialOrd + ToPrimitive,
376{
377    let len = data.len();
378
379    // Need at least 4 elements for meaningful kurtosis
380    if len < 4 {
381        core::hint::cold_path();
382        return None;
383    }
384
385    // Use pre-calculated mean if provided, otherwise compute it
386    let mean = precalc_mean.unwrap_or_else(|| {
387        let sum: f64 = if len < PARALLEL_THRESHOLD {
388            // Iterator sum enables auto-vectorization (SIMD) by the compiler
389            data.iter()
390                // SAFETY: to_f64() always returns Some for standard numeric types (f32/f64, i/u 8-64)
391                .map(|x| unsafe { x.0.to_f64().unwrap_unchecked() })
392                .sum()
393        } else {
394            data.par_iter()
395                // SAFETY: to_f64() always returns Some for standard numeric types
396                .map(|x| unsafe { x.0.to_f64().unwrap_unchecked() })
397                .sum()
398        };
399        sum / len as f64
400    });
401
402    // Compute variance_sq and fourth_power_sum
403    // If variance is provided, we can compute variance_sq directly (variance_sq = variance^2)
404    // Otherwise, we need to compute variance from the data
405    let (variance_sq, fourth_power_sum) = if let Some(variance) = precalc_variance {
406        // Negative variance is invalid (possible floating-point rounding artifact)
407        if variance < 0.0 {
408            core::hint::cold_path();
409            return None;
410        }
411        // Use pre-calculated variance: variance_sq = variance^2
412        let variance_sq = variance * variance;
413
414        // Still need to compute fourth_power_sum
415        let fourth_power_sum = if len < PARALLEL_THRESHOLD {
416            let mut sum = 0.0;
417            for x in data {
418                // SAFETY: to_f64() always returns Some for standard numeric types
419                let val = unsafe { x.0.to_f64().unwrap_unchecked() };
420                let diff = val - mean;
421                let diff_sq = diff * diff;
422                sum = diff_sq.mul_add(diff_sq, sum);
423            }
424            sum
425        } else {
426            data.par_iter()
427                .map(|x| {
428                    // SAFETY: to_f64() always returns Some for standard numeric types
429                    let val = unsafe { x.0.to_f64().unwrap_unchecked() };
430                    let diff = val - mean;
431                    let diff_sq = diff * diff;
432                    diff_sq * diff_sq
433                })
434                .sum()
435        };
436
437        (variance_sq, fourth_power_sum)
438    } else {
439        // Compute both variance_sum and fourth_power_sum
440        let (variance_sum, fourth_power_sum) = if len < PARALLEL_THRESHOLD {
441            let mut variance_sum = 0.0;
442            let mut fourth_power_sum = 0.0;
443
444            for x in data {
445                // SAFETY: to_f64() always returns Some for standard numeric types
446                let val = unsafe { x.0.to_f64().unwrap_unchecked() };
447                let diff = val - mean;
448                let diff_sq = diff * diff;
449                variance_sum += diff_sq;
450                fourth_power_sum = diff_sq.mul_add(diff_sq, fourth_power_sum);
451            }
452
453            (variance_sum, fourth_power_sum)
454        } else {
455            // Single pass computing both sums simultaneously
456            data.par_iter()
457                .fold(
458                    || (0.0_f64, 0.0_f64),
459                    |acc, x| {
460                        // SAFETY: to_f64() always returns Some for standard numeric types
461                        let val = unsafe { x.0.to_f64().unwrap_unchecked() };
462                        let diff = val - mean;
463                        let diff_sq = diff * diff;
464                        (acc.0 + diff_sq, diff_sq.mul_add(diff_sq, acc.1))
465                    },
466                )
467                .reduce(|| (0.0, 0.0), |a, b| (a.0 + b.0, a.1 + b.1))
468        };
469
470        let variance = variance_sum / len as f64;
471
472        // If variance is zero, all values are the same, kurtosis is undefined
473        if variance == 0.0 {
474            core::hint::cold_path();
475            return None;
476        }
477
478        let variance_sq = variance * variance;
479        (variance_sq, fourth_power_sum)
480    };
481
482    // If variance_sq is zero, all values are the same, kurtosis is undefined
483    if variance_sq == 0.0 {
484        core::hint::cold_path();
485        return None;
486    }
487
488    let n = len as f64;
489
490    // Sample excess kurtosis formula:
491    // kurtosis = (n(n+1) * Σ((x_i - mean)⁴)) / ((n-1)(n-2)(n-3) * variance²) - 3(n-1)²/((n-2)(n-3))
492    let denominator = (n - 1.0) * (n - 2.0) * (n - 3.0);
493    let adjustment = 3.0 * (n - 1.0) * (n - 1.0) / denominator;
494    let kurtosis =
495        (n * (n + 1.0) * fourth_power_sum).mul_add(1.0 / (denominator * variance_sq), -adjustment);
496
497    Some(kurtosis)
498}
499
500fn percentile_rank_on_sorted<T, V>(data: &[Partial<T>], value: &V) -> Option<f64>
501where
502    T: PartialOrd + ToPrimitive,
503    V: PartialOrd + ToPrimitive,
504{
505    let len = data.len();
506
507    if len == 0 {
508        core::hint::cold_path();
509        return None;
510    }
511
512    let value_f64 = value.to_f64()?;
513
514    // Binary search to find the position where value would be inserted
515    // This gives us the number of values <= value
516    let count_leq = data.binary_search_by(|x| {
517        x.0.to_f64()
518            .unwrap_or(f64::NAN)
519            .partial_cmp(&value_f64)
520            .unwrap_or(std::cmp::Ordering::Less)
521    });
522
523    let count = match count_leq {
524        Ok(idx) => {
525            // Value found at idx — use partition_point (O(log n)) to find the upper bound
526            // of equal values instead of a linear scan
527            let upper = data[idx + 1..].partition_point(|x| {
528                x.0.to_f64()
529                    .is_some_and(|v| v.total_cmp(&value_f64).is_le())
530            });
531            idx + 1 + upper
532        }
533        Err(idx) => idx, // Number of values less than value
534    };
535
536    // Percentile rank = (count / n) * 100
537    Some((count as f64 / len as f64) * 100.0)
538}
539
540fn atkinson_on_sorted<T>(
541    data: &[Partial<T>],
542    epsilon: f64,
543    precalc_mean: Option<f64>,
544    precalc_geometric_sum: Option<f64>,
545) -> Option<f64>
546where
547    T: Sync + PartialOrd + ToPrimitive,
548{
549    let len = data.len();
550
551    // Early return for empty data
552    if len == 0 {
553        core::hint::cold_path();
554        return None;
555    }
556
557    // Single element case: perfect equality, Atkinson = 0
558    if len == 1 {
559        core::hint::cold_path();
560        return Some(0.0);
561    }
562
563    // Epsilon must be non-negative
564    if epsilon < 0.0 {
565        core::hint::cold_path();
566        return None;
567    }
568
569    let epsilon_is_one = (epsilon - 1.0).abs() < 1e-10;
570
571    // Fused fast path: epsilon=1 with no precalc — compute sum (for mean) and
572    // ln_sum (for geometric_sum) in a single pass over data, halving memory bandwidth.
573    if epsilon_is_one && precalc_mean.is_none() && precalc_geometric_sum.is_none() {
574        // Explicit NaN check plus `v <= 0.0` rejects NaN, zero, negatives, and
575        // -infinity. The non-fused epsilon=1 branch rejects NaN via a post-sum
576        // `is_nan()` check; handling it per-element here is equivalent.
577        let (sum, ln_sum, any_invalid) = if len < PARALLEL_THRESHOLD {
578            let mut s = 0.0f64;
579            let mut ls = 0.0f64;
580            let mut bad = false;
581            for x in data {
582                // SAFETY: to_f64() always returns Some for standard numeric types
583                let v = unsafe { x.0.to_f64().unwrap_unchecked() };
584                if v.is_nan() || v <= 0.0 {
585                    bad = true;
586                } else {
587                    s += v;
588                    ls += v.ln();
589                }
590            }
591            (s, ls, bad)
592        } else {
593            data.par_iter()
594                .fold(
595                    || (0.0f64, 0.0f64, false),
596                    |(s, ls, bad), x| {
597                        // SAFETY: to_f64() always returns Some for standard numeric types
598                        let v = unsafe { x.0.to_f64().unwrap_unchecked() };
599                        if v.is_nan() || v <= 0.0 {
600                            (s, ls, true)
601                        } else {
602                            (s + v, ls + v.ln(), bad)
603                        }
604                    },
605                )
606                .reduce(
607                    || (0.0, 0.0, false),
608                    |a, b| (a.0 + b.0, a.1 + b.1, a.2 || b.2),
609                )
610        };
611        if any_invalid {
612            core::hint::cold_path();
613            return None;
614        }
615        let mean = sum / len as f64;
616        if mean == 0.0 {
617            core::hint::cold_path();
618            return None;
619        }
620        let geometric_mean = (ln_sum / len as f64).exp();
621        return Some(1.0 - geometric_mean / mean);
622    }
623
624    // Use pre-calculated mean if provided, otherwise compute it
625    let mean = precalc_mean.unwrap_or_else(|| {
626        let sum: f64 = if len < PARALLEL_THRESHOLD {
627            // Iterator sum enables auto-vectorization (SIMD) by the compiler
628            data.iter()
629                // SAFETY: to_f64() always returns Some for standard numeric types (f32/f64, i/u 8-64)
630                .map(|x| unsafe { x.0.to_f64().unwrap_unchecked() })
631                .sum()
632        } else {
633            data.par_iter()
634                // SAFETY: to_f64() always returns Some for standard numeric types
635                .map(|x| unsafe { x.0.to_f64().unwrap_unchecked() })
636                .sum()
637        };
638        sum / len as f64
639    });
640
641    // If mean is zero, Atkinson is undefined
642    if mean == 0.0 {
643        core::hint::cold_path();
644        return None;
645    }
646
647    // Handle special case: epsilon = 1 (uses geometric mean).
648    // Reached only when precalc_mean and/or precalc_geometric_sum was supplied;
649    // the fully-unsupplied case is handled by the fused fast path above.
650    if epsilon_is_one {
651        // A_1 = 1 - (geometric_mean / mean)
652        let geometric_sum: f64 = if let Some(precalc) = precalc_geometric_sum {
653            precalc
654        } else if len < PARALLEL_THRESHOLD {
655            let mut sum = 0.0;
656            for x in data {
657                // SAFETY: to_f64() always returns Some for standard numeric types
658                let val = unsafe { x.0.to_f64().unwrap_unchecked() };
659                if val <= 0.0 {
660                    // Geometric mean undefined for non-positive values
661                    return None;
662                }
663                sum += val.ln();
664            }
665            sum
666        } else {
667            data.par_iter()
668                .map(|x| {
669                    // SAFETY: to_f64() always returns Some for standard numeric types
670                    let val = unsafe { x.0.to_f64().unwrap_unchecked() };
671                    if val <= 0.0 {
672                        return f64::NAN;
673                    }
674                    val.ln()
675                })
676                .sum()
677        };
678
679        if geometric_sum.is_nan() {
680            core::hint::cold_path();
681            return None;
682        }
683
684        let geometric_mean = (geometric_sum / len as f64).exp();
685        return Some(1.0 - geometric_mean / mean);
686    }
687
688    // General case: epsilon != 1
689    // A_ε = 1 - (1/n * Σ((x_i/mean)^(1-ε)))^(1/(1-ε))
690    let exponent = 1.0 - epsilon;
691    // Hoist reciprocal: replace per-element division with multiplication in the hot loop.
692    let inv_mean = mean.recip();
693
694    let sum_powered: f64 = if len < PARALLEL_THRESHOLD {
695        let mut sum = 0.0;
696        for x in data {
697            // SAFETY: to_f64() always returns Some for standard numeric types
698            let val = unsafe { x.0.to_f64().unwrap_unchecked() };
699            if val < 0.0 {
700                // Negative values with non-integer exponent are undefined
701                return None;
702            }
703            let ratio = val * inv_mean;
704            sum += ratio.powf(exponent);
705        }
706        sum
707    } else {
708        data.par_iter()
709            .map(|x| {
710                // SAFETY: to_f64() always returns Some for standard numeric types
711                let val = unsafe { x.0.to_f64().unwrap_unchecked() };
712                if val < 0.0 {
713                    return f64::NAN;
714                }
715                let ratio = val * inv_mean;
716                ratio.powf(exponent)
717            })
718            .sum()
719    };
720
721    if sum_powered.is_nan() || sum_powered <= 0.0 {
722        core::hint::cold_path();
723        return None;
724    }
725
726    let atkinson = 1.0 - (sum_powered / len as f64).powf(1.0 / exponent);
727    Some(atkinson)
728}
729
730/// Selection algorithm to find the k-th smallest element in O(n) average time.
731/// This is an implementation of quickselect algorithm.
732#[cfg(test)]
733fn quickselect<T>(data: &mut [Partial<T>], k: usize) -> Option<&T>
734where
735    T: PartialOrd,
736{
737    if data.is_empty() || k >= data.len() {
738        core::hint::cold_path();
739        return None;
740    }
741
742    let mut left = 0;
743    let mut right = data.len() - 1;
744
745    loop {
746        if left == right {
747            return Some(&data[left].0);
748        }
749
750        // Use median-of-three pivot selection for better performance
751        let pivot_idx = median_of_three_pivot(data, left, right);
752        let pivot_idx = partition(data, left, right, pivot_idx);
753
754        match k.cmp(&pivot_idx) {
755            std::cmp::Ordering::Equal => return Some(&data[pivot_idx].0),
756            std::cmp::Ordering::Less => right = pivot_idx - 1,
757            std::cmp::Ordering::Greater => left = pivot_idx + 1,
758        }
759    }
760}
761
762/// Select the median of three elements as pivot for better quickselect performance
763#[cfg(test)]
764fn median_of_three_pivot<T>(data: &[Partial<T>], left: usize, right: usize) -> usize
765where
766    T: PartialOrd,
767{
768    let mid = left + (right - left) / 2;
769
770    if data[left] <= data[mid] {
771        if data[mid] <= data[right] {
772            mid
773        } else if data[left] <= data[right] {
774            right
775        } else {
776            left
777        }
778    } else if data[left] <= data[right] {
779        left
780    } else if data[mid] <= data[right] {
781        right
782    } else {
783        mid
784    }
785}
786
787/// Partition function for quickselect
788#[cfg(test)]
789fn partition<T>(data: &mut [Partial<T>], left: usize, right: usize, pivot_idx: usize) -> usize
790where
791    T: PartialOrd,
792{
793    // Move pivot to end
794    data.swap(pivot_idx, right);
795    let mut store_idx = left;
796
797    // Move all elements smaller than pivot to the left
798    // Cache pivot position for better cache locality (access data[right] directly each time)
799    for i in left..right {
800        // Safety: i, store_idx, and right are guaranteed to be in bounds
801        // Compare directly with pivot at data[right] - compiler should optimize this access
802        if unsafe { data.get_unchecked(i) <= data.get_unchecked(right) } {
803            data.swap(i, store_idx);
804            store_idx += 1;
805        }
806    }
807
808    // Move pivot to its final place
809    data.swap(store_idx, right);
810    store_idx
811}
812
813// This implementation follows Method 3 from https://en.wikipedia.org/wiki/Quartile
814// It divides data into quarters based on the length n = 4k + r where r is remainder.
815// For each remainder case (0,1,2,3), it uses different formulas to compute Q1, Q2, Q3.
816fn quartiles_on_sorted<T>(data: &[Partial<T>]) -> Option<(f64, f64, f64)>
817where
818    T: PartialOrd + ToPrimitive,
819{
820    let len = data.len();
821
822    // Early return for small arrays
823    match len {
824        0..=2 => {
825            core::hint::cold_path();
826            return None;
827        }
828        3 => {
829            return Some(
830                // SAFETY: We know these indices are valid because len == 3
831                unsafe {
832                    (
833                        data.get_unchecked(0).0.to_f64()?,
834                        data.get_unchecked(1).0.to_f64()?,
835                        data.get_unchecked(2).0.to_f64()?,
836                    )
837                },
838            );
839        }
840        _ => {}
841    }
842
843    // Calculate k and remainder in one division
844    let k = len / 4;
845    let remainder = len % 4;
846
847    // SAFETY: All index calculations below are guaranteed to be in bounds
848    // because we've verified len >= 4 above and k is len/4
849    unsafe {
850        Some(match remainder {
851            0 => {
852                // Let data = {x_i}_{i=0..4k} where k is positive integer.
853                // Median q2 = (x_{2k-1} + x_{2k}) / 2.
854                // If we divide data into two parts {x_i < q2} as L and
855                // {x_i > q2} as R, #L == #R == 2k holds true. Thus,
856                // q1 = (x_{k-1} + x_{k}) / 2 and q3 = (x_{3k-1} + x_{3k}) / 2.
857                // =============
858                // Simply put: Length is multiple of 4 (4k)
859                // q1 = (x_{k-1} + x_k) / 2
860                // q2 = (x_{2k-1} + x_{2k}) / 2
861                // q3 = (x_{3k-1} + x_{3k}) / 2
862                let q1 = f64::midpoint(
863                    data.get_unchecked(k - 1).0.to_f64()?,
864                    data.get_unchecked(k).0.to_f64()?,
865                );
866                let q2 = f64::midpoint(
867                    data.get_unchecked(2 * k - 1).0.to_f64()?,
868                    data.get_unchecked(2 * k).0.to_f64()?,
869                );
870                let q3 = f64::midpoint(
871                    data.get_unchecked(3 * k - 1).0.to_f64()?,
872                    data.get_unchecked(3 * k).0.to_f64()?,
873                );
874                (q1, q2, q3)
875            }
876            1 => {
877                // Let data = {x_i}_{i=0..4k+1} where k is positive integer.
878                // Median q2 = x_{2k}.
879                // If we divide data other than q2 into two parts {x_i < q2}
880                // as L and {x_i > q2} as R, #L == #R == 2k holds true. Thus,
881                // q1 = (x_{k-1} + x_{k}) / 2 and q3 = (x_{3k} + x_{3k+1}) / 2.
882                // =============
883                // Simply put: Length is 4k + 1
884                // q1 = (x_{k-1} + x_k) / 2
885                // q2 = x_{2k}
886                // q3 = (x_{3k} + x_{3k+1}) / 2
887                let q1 = f64::midpoint(
888                    data.get_unchecked(k - 1).0.to_f64()?,
889                    data.get_unchecked(k).0.to_f64()?,
890                );
891                let q2 = data.get_unchecked(2 * k).0.to_f64()?;
892                let q3 = f64::midpoint(
893                    data.get_unchecked(3 * k).0.to_f64()?,
894                    data.get_unchecked(3 * k + 1).0.to_f64()?,
895                );
896                (q1, q2, q3)
897            }
898            2 => {
899                // Let data = {x_i}_{i=0..4k+2} where k is positive integer.
900                // Median q2 = (x_{(2k+1)-1} + x_{2k+1}) / 2.
901                // If we divide data into two parts {x_i < q2} as L and
902                // {x_i > q2} as R, it's true that #L == #R == 2k+1.
903                // Thus, q1 = x_{k} and q3 = x_{3k+1}.
904                // =============
905                // Simply put: Length is 4k + 2
906                // q1 = x_k
907                // q2 = (x_{2k} + x_{2k+1}) / 2
908                // q3 = x_{3k+1}
909                let q1 = data.get_unchecked(k).0.to_f64()?;
910                let q2 = f64::midpoint(
911                    data.get_unchecked(2 * k).0.to_f64()?,
912                    data.get_unchecked(2 * k + 1).0.to_f64()?,
913                );
914                let q3 = data.get_unchecked(3 * k + 1).0.to_f64()?;
915                (q1, q2, q3)
916            }
917            _ => {
918                // Let data = {x_i}_{i=0..4k+3} where k is positive integer.
919                // Median q2 = x_{2k+1}.
920                // If we divide data other than q2 into two parts {x_i < q2}
921                // as L and {x_i > q2} as R, #L == #R == 2k+1 holds true.
922                // Thus, q1 = x_{k} and q3 = x_{3k+2}.
923                // =============
924                // Simply put: Length is 4k + 3
925                // q1 = x_k
926                // q2 = x_{2k+1}
927                // q3 = x_{3k+2}
928                let q1 = data.get_unchecked(k).0.to_f64()?;
929                let q2 = data.get_unchecked(2 * k + 1).0.to_f64()?;
930                let q3 = data.get_unchecked(3 * k + 2).0.to_f64()?;
931                (q1, q2, q3)
932            }
933        })
934    }
935}
936
937/// Zero-copy quartiles computation using index-based selection.
938/// This avoids copying data by working with an array of indices.
939///
940/// Uses `select_nth_unstable_by` on the indices array, which partitions in-place.
941/// After selecting position p, elements at indices [0..p] are <= the p-th element
942/// and elements at [p+1..] are >= it. By selecting positions in ascending order,
943/// each subsequent selection only needs to search within the right partition,
944/// avoiding redundant O(n) resets.
945fn quartiles_with_zero_copy_selection<T>(data: &[Partial<T>]) -> Option<(f64, f64, f64)>
946where
947    T: PartialOrd + ToPrimitive,
948{
949    let len = data.len();
950
951    // Early return for small arrays
952    match len {
953        0..=2 => {
954            core::hint::cold_path();
955            return None;
956        }
957        3 => {
958            let mut indices: Vec<usize> = (0..3).collect();
959            let cmp = |a: &usize, b: &usize| {
960                data[*a]
961                    .partial_cmp(&data[*b])
962                    .unwrap_or(std::cmp::Ordering::Less)
963            };
964            indices.sort_unstable_by(cmp);
965            let min_val = data[indices[0]].0.to_f64()?;
966            let med_val = data[indices[1]].0.to_f64()?;
967            let max_val = data[indices[2]].0.to_f64()?;
968            return Some((min_val, med_val, max_val));
969        }
970        _ => {}
971    }
972
973    let k = len / 4;
974    let remainder = len % 4;
975
976    let mut indices: Vec<usize> = (0..len).collect();
977    let cmp = |a: &usize, b: &usize| {
978        data[*a]
979            .partial_cmp(&data[*b])
980            .unwrap_or(std::cmp::Ordering::Less)
981    };
982
983    // Collect the unique positions we need in ascending order.
984    // By selecting in ascending order, each select_nth_unstable_by partitions
985    // the array so subsequent selections operate on progressively smaller slices.
986    // We deduplicate because adjacent quartile boundaries can overlap for small k.
987    let raw_positions: Vec<usize> = match remainder {
988        0 => vec![k - 1, k, 2 * k - 1, 2 * k, 3 * k - 1, 3 * k],
989        1 => vec![k - 1, k, 2 * k, 3 * k, 3 * k + 1],
990        2 => vec![k, 2 * k, 2 * k + 1, 3 * k + 1],
991        _ => vec![k, 2 * k + 1, 3 * k + 2],
992    };
993
994    let mut unique_positions = raw_positions.clone();
995    unique_positions.dedup();
996
997    // Select each unique position in ascending order, narrowing the search range
998    let mut start = 0;
999    for &pos in &unique_positions {
1000        indices[start..].select_nth_unstable_by(pos - start, &cmp);
1001        start = pos + 1;
1002    }
1003
1004    // Now read all needed values (including duplicates) from the partitioned indices
1005    let values: Vec<f64> = raw_positions
1006        .iter()
1007        .map(|&pos| data[indices[pos]].0.to_f64())
1008        .collect::<Option<Vec<_>>>()?;
1009
1010    match remainder {
1011        0 => {
1012            let q1 = f64::midpoint(values[0], values[1]);
1013            let q2 = f64::midpoint(values[2], values[3]);
1014            let q3 = f64::midpoint(values[4], values[5]);
1015            Some((q1, q2, q3))
1016        }
1017        1 => {
1018            let q1 = f64::midpoint(values[0], values[1]);
1019            let q2 = values[2];
1020            let q3 = f64::midpoint(values[3], values[4]);
1021            Some((q1, q2, q3))
1022        }
1023        2 => {
1024            let q1 = values[0];
1025            let q2 = f64::midpoint(values[1], values[2]);
1026            let q3 = values[3];
1027            Some((q1, q2, q3))
1028        }
1029        _ => Some((values[0], values[1], values[2])),
1030    }
1031}
1032
1033fn mode_on_sorted<T, I>(it: I) -> Option<T>
1034where
1035    T: PartialOrd,
1036    I: Iterator<Item = T>,
1037{
1038    use std::cmp::Ordering;
1039
1040    // This approach to computing the mode works very nicely when the
1041    // number of samples is large and is close to its cardinality.
1042    // In other cases, a hashmap would be much better.
1043    // But really, how can we know this when given an arbitrary stream?
1044    // Might just switch to a hashmap to track frequencies. That would also
1045    // be generally useful for discovering the cardinality of a sample.
1046    let (mut mode, mut next) = (None, None);
1047    let (mut mode_count, mut next_count) = (0usize, 0usize);
1048    for x in it {
1049        if mode.as_ref() == Some(&x) {
1050            mode_count += 1;
1051        } else if next.as_ref() == Some(&x) {
1052            next_count += 1;
1053        } else {
1054            next = Some(x);
1055            next_count = 0;
1056        }
1057
1058        match next_count.cmp(&mode_count) {
1059            Ordering::Greater => {
1060                mode = next;
1061                mode_count = next_count;
1062                next = None;
1063                next_count = 0;
1064            }
1065            Ordering::Equal => {
1066                mode = None;
1067                mode_count = 0;
1068            }
1069            Ordering::Less => {}
1070        }
1071    }
1072    mode
1073}
1074
1075/// Computes both modes and antimodes from a sorted slice of values.
1076/// This version works with references to avoid unnecessary cloning.
1077///
1078/// # Arguments
1079///
1080/// * `data` - A sorted slice of values
1081///
1082/// # Notes
1083///
1084/// - Mode is the most frequently occurring value(s)
1085/// - Antimode is the least frequently occurring value(s)
1086/// - Only returns up to 10 antimodes to avoid returning the full set when all values are unique
1087/// - For empty slices, returns empty vectors and zero counts
1088/// - For single value slices, returns that value as the mode and empty antimode
1089/// - When all values occur exactly once, returns empty mode and up to 10 values as antimodes
1090///
1091/// # Returns
1092///
1093/// A tuple containing:
1094/// * Modes information: `(Vec<T>, usize, u32)` where:
1095///   - Vec<T>: Vector containing the mode values
1096///   - usize: Number of modes found
1097///   - u32: Frequency/count of the mode values
1098/// * Antimodes information: `(Vec<T>, usize, u32)` where:
1099///   - Vec<T>: Vector containing up to 10 antimode values
1100///   - usize: Total number of antimodes
1101///   - u32: Frequency/count of the antimode values
1102#[allow(clippy::type_complexity)]
1103#[inline]
1104fn modes_and_antimodes_on_sorted_slice<T>(
1105    data: &[Partial<T>],
1106) -> ((Vec<T>, usize, u32), (Vec<T>, usize, u32))
1107where
1108    T: PartialOrd + Clone,
1109{
1110    let size = data.len();
1111
1112    // Early return for empty slice
1113    if size == 0 {
1114        core::hint::cold_path();
1115        return ((Vec::new(), 0, 0), (Vec::new(), 0, 0));
1116    }
1117
1118    // Estimate capacity using integer square root of size
1119    let sqrt_size = size.isqrt();
1120    let mut runs: Vec<(&T, u32)> = Vec::with_capacity(sqrt_size.clamp(16, 1_000));
1121
1122    let mut current_value = &data[0].0;
1123    let mut current_count = 1;
1124    let mut highest_count = 1;
1125    let mut lowest_count = u32::MAX;
1126
1127    // Count consecutive runs - optimized to reduce allocations
1128    for x in data.iter().skip(1) {
1129        if x.0 == *current_value {
1130            current_count += 1;
1131            highest_count = highest_count.max(current_count);
1132        } else {
1133            runs.push((current_value, current_count));
1134            lowest_count = lowest_count.min(current_count);
1135            current_value = &x.0;
1136            current_count = 1;
1137        }
1138    }
1139    runs.push((current_value, current_count));
1140    lowest_count = lowest_count.min(current_count);
1141
1142    // Early return if only one unique value
1143    if runs.len() == 1 {
1144        let (val, count) = runs.pop().unwrap();
1145        return ((vec![val.clone()], 1, count), (Vec::new(), 0, 0));
1146    }
1147
1148    // Special case: if all values appear exactly once
1149    if highest_count == 1 {
1150        let antimodes_count = runs.len().min(10);
1151        let total_count = runs.len();
1152        let mut antimodes = Vec::with_capacity(antimodes_count);
1153        for (val, _) in runs.into_iter().take(antimodes_count) {
1154            antimodes.push(val.clone());
1155        }
1156        // For modes: empty, count 0, occurrences 0 (not 1, 1)
1157        return ((Vec::new(), 0, 0), (antimodes, total_count, 1));
1158    }
1159
1160    // Collect modes and antimodes directly in a single pass, cloning values immediately
1161    // instead of collecting indices first and then cloning in a second pass
1162    let estimated_modes = (runs.len() / 10).clamp(1, 10);
1163    let estimated_antimodes = 10.min(runs.len());
1164
1165    let mut modes_result = Vec::with_capacity(estimated_modes);
1166    let mut antimodes_result = Vec::with_capacity(estimated_antimodes);
1167    let mut mode_count = 0;
1168    let mut antimodes_count = 0;
1169    let mut antimodes_collected = 0_u32;
1170
1171    for (val, count) in &runs {
1172        if *count == highest_count {
1173            modes_result.push((*val).clone());
1174            mode_count += 1;
1175        }
1176        if *count == lowest_count {
1177            antimodes_count += 1;
1178            if antimodes_collected < 10 {
1179                antimodes_result.push((*val).clone());
1180                antimodes_collected += 1;
1181            }
1182        }
1183    }
1184
1185    (
1186        (modes_result, mode_count, highest_count),
1187        (antimodes_result, antimodes_count, lowest_count),
1188    )
1189}
1190
1191/// A commutative data structure for lazily sorted sequences of data.
1192///
1193/// The sort does not occur until statistics need to be computed.
1194///
1195/// Note that this works on types that do not define a total ordering like
1196/// `f32` and `f64`. When an ordering is not defined, an arbitrary order
1197/// is returned.
1198#[allow(clippy::unsafe_derive_deserialize)]
1199#[derive(Clone, Serialize, Deserialize)]
1200pub struct Unsorted<T> {
1201    /// Internal cache flag indicating whether `data` is currently sorted.
1202    /// This field is skipped during serialization and deserialization.
1203    #[serde(skip)]
1204    sorted: bool,
1205    data: Vec<Partial<T>>,
1206}
1207
1208// Manual PartialEq/Eq: ignore `sorted` cache flag so equality reflects
1209// logical contents only (two Unsorted with same data compare equal
1210// regardless of whether one has been sorted).
1211impl<T: PartialEq> PartialEq for Unsorted<T> {
1212    fn eq(&self, other: &Self) -> bool {
1213        self.data == other.data
1214    }
1215}
1216
1217impl<T: PartialEq> Eq for Unsorted<T> where Partial<T>: Eq {}
1218
1219impl<T: PartialOrd + Send> Unsorted<T> {
1220    /// Create initial empty state.
1221    #[inline]
1222    #[must_use]
1223    pub fn new() -> Unsorted<T> {
1224        Default::default()
1225    }
1226
1227    /// Add a new element to the set.
1228    #[allow(clippy::inline_always)]
1229    #[inline(always)]
1230    pub fn add(&mut self, v: T) {
1231        self.sorted = false;
1232        self.data.push(Partial(v));
1233    }
1234
1235    /// Return the number of data points.
1236    #[inline]
1237    #[must_use]
1238    pub const fn len(&self) -> usize {
1239        self.data.len()
1240    }
1241
1242    #[inline]
1243    #[must_use]
1244    pub const fn is_empty(&self) -> bool {
1245        self.data.is_empty()
1246    }
1247
1248    #[inline]
1249    fn sort(&mut self) {
1250        if !self.sorted {
1251            // Use sequential sort for small datasets (< 10k elements) to avoid parallel overhead
1252            if self.data.len() < PARALLEL_THRESHOLD {
1253                self.data.sort_unstable();
1254            } else {
1255                self.data.par_sort_unstable();
1256            }
1257            self.sorted = true;
1258        }
1259    }
1260
1261    #[inline]
1262    const fn already_sorted(&mut self) {
1263        self.sorted = true;
1264    }
1265
1266    /// Add multiple elements efficiently
1267    #[inline]
1268    pub fn add_bulk(&mut self, values: Vec<T>) {
1269        self.sorted = false;
1270        self.data.reserve(values.len());
1271        self.data.extend(values.into_iter().map(Partial));
1272    }
1273
1274    /// Shrink capacity to fit current data
1275    #[inline]
1276    pub fn shrink_to_fit(&mut self) {
1277        self.data.shrink_to_fit();
1278    }
1279
1280    /// Create with specific capacity
1281    #[inline]
1282    #[must_use]
1283    pub fn with_capacity(capacity: usize) -> Self {
1284        Unsorted {
1285            sorted: true,
1286            data: Vec::with_capacity(capacity),
1287        }
1288    }
1289
1290    /// Add a value assuming it's greater than all existing values
1291    #[inline]
1292    pub fn push_ascending(&mut self, value: T) {
1293        if let Some(last) = self.data.last() {
1294            debug_assert!(last.0 <= value, "Value must be >= than last element");
1295        }
1296        self.data.push(Partial(value));
1297        // Data remains sorted
1298    }
1299}
1300
1301impl<T: PartialOrd + PartialEq + Clone + Send + Sync> Unsorted<T> {
1302    #[inline]
1303    /// Returns the cardinality of the data.
1304    /// Set `sorted` to `true` if the data is already sorted.
1305    /// Set `parallel_threshold` to `0` to force sequential processing.
1306    /// Set `parallel_threshold` to `1` to use the default parallel threshold (`10_000`).
1307    /// Set `parallel_threshold` to `2` to force parallel processing.
1308    /// Set `parallel_threshold` to any other value to use a custom parallel threshold
1309    /// greater than the default threshold of `10_000`.
1310    pub fn cardinality(&mut self, sorted: bool, parallel_threshold: usize) -> u64 {
1311        const CHUNK_SIZE: usize = 2048; // Process data in chunks of 2048 elements
1312        const DEFAULT_PARALLEL_THRESHOLD: usize = 10_240; // multiple of 2048
1313
1314        let len = self.data.len();
1315        match len {
1316            0 => return 0,
1317            1 => return 1,
1318            _ => {}
1319        }
1320
1321        if sorted {
1322            self.already_sorted();
1323        } else {
1324            self.sort();
1325        }
1326
1327        let use_parallel = parallel_threshold != 0
1328            && (parallel_threshold == 1
1329                || len > parallel_threshold.max(DEFAULT_PARALLEL_THRESHOLD));
1330
1331        if use_parallel {
1332            // Parallel processing using chunks via fold/reduce — no intermediate Vec.
1333            // Reduction state: (count, leftmost_first, rightmost_last). Associative:
1334            // combining (cL, fL, lL) with (cR, fR, lR) yields (cL+cR - [lL==fR], fL, lR).
1335            self.data
1336                .par_chunks(CHUNK_SIZE)
1337                .map(|chunk| {
1338                    // Count unique elements within this chunk
1339                    let mut count = u64::from(!chunk.is_empty());
1340                    for [a, b] in chunk.array_windows::<2>() {
1341                        if a != b {
1342                            count += 1;
1343                        }
1344                    }
1345                    (count, chunk.first(), chunk.last())
1346                })
1347                .reduce(
1348                    || (0u64, None, None),
1349                    |(cl, fl, ll), (cr, fr, lr)| match (ll, fr) {
1350                        // `None` endpoints only arise from the identity today, but summing
1351                        // counts in both arms keeps the combiner correct even if a future
1352                        // mapper returns a non-empty chunk with `None` endpoints.
1353                        (None, _) => (cl + cr, fr, lr),
1354                        (_, None) => (cl + cr, fl, ll),
1355                        (Some(l), Some(r)) => {
1356                            let adj = u64::from(l == r);
1357                            (cl + cr - adj, fl, lr)
1358                        }
1359                    },
1360                )
1361                .0
1362        } else {
1363            // Sequential processing
1364
1365            // the statement below is equivalent to:
1366            // let mut count = if self.data.is_empty() { 0 } else { 1 };
1367            let mut count = u64::from(!self.data.is_empty());
1368
1369            for [a, b] in self.data.array_windows::<2>() {
1370                if a != b {
1371                    count += 1;
1372                }
1373            }
1374            count
1375        }
1376    }
1377}
1378
1379impl<T: PartialOrd + Clone + Send> Unsorted<T> {
1380    /// Returns the mode of the data.
1381    #[inline]
1382    pub fn mode(&mut self) -> Option<T> {
1383        if self.data.is_empty() {
1384            return None;
1385        }
1386        self.sort();
1387        mode_on_sorted(self.data.iter().map(|p| &p.0)).cloned()
1388    }
1389
1390    /// Returns the modes of the data.
1391    /// Note that there is also a `frequency::mode()` function that return one mode
1392    /// with the highest frequency. If there is a tie, it returns None.
1393    #[inline]
1394    fn modes(&mut self) -> (Vec<T>, usize, u32) {
1395        if self.data.is_empty() {
1396            return (Vec::new(), 0, 0);
1397        }
1398        self.sort();
1399        modes_and_antimodes_on_sorted_slice(&self.data).0
1400    }
1401
1402    /// Returns the antimodes of the data.
1403    /// `antimodes_result` only returns the first 10 antimodes
1404    #[inline]
1405    fn antimodes(&mut self) -> (Vec<T>, usize, u32) {
1406        if self.data.is_empty() {
1407            return (Vec::new(), 0, 0);
1408        }
1409        self.sort();
1410        modes_and_antimodes_on_sorted_slice(&self.data).1
1411    }
1412
1413    /// Returns the modes and antimodes of the data.
1414    /// `antimodes_result` only returns the first 10 antimodes
1415    #[allow(clippy::type_complexity)]
1416    #[inline]
1417    pub fn modes_antimodes(&mut self) -> ((Vec<T>, usize, u32), (Vec<T>, usize, u32)) {
1418        if self.data.is_empty() {
1419            return ((Vec::new(), 0, 0), (Vec::new(), 0, 0));
1420        }
1421        self.sort();
1422        modes_and_antimodes_on_sorted_slice(&self.data)
1423    }
1424}
1425
1426impl Unsorted<Vec<u8>> {
1427    /// Add a byte slice, converting to `Vec<u8>` internally.
1428    ///
1429    /// This is a convenience method that avoids requiring the caller to call
1430    /// `.to_vec()` before `add()`. The allocation still occurs internally,
1431    /// but the API is cleaner and opens the door for future optimizations
1432    /// (e.g., frequency-map deduplication for high-cardinality data).
1433    #[allow(clippy::inline_always)]
1434    #[inline(always)]
1435    pub fn add_bytes(&mut self, v: &[u8]) {
1436        self.sorted = false;
1437        self.data.push(Partial(v.to_vec()));
1438    }
1439}
1440
1441impl<T: PartialOrd + ToPrimitive + Send> Unsorted<T> {
1442    /// Returns the median of the data.
1443    #[inline]
1444    pub fn median(&mut self) -> Option<f64> {
1445        if self.data.is_empty() {
1446            return None;
1447        }
1448        self.sort();
1449        median_on_sorted(&self.data)
1450    }
1451}
1452
1453impl<T: PartialOrd + ToPrimitive + Send + Sync> Unsorted<T> {
1454    /// Returns the Median Absolute Deviation (MAD) of the data.
1455    #[inline]
1456    pub fn mad(&mut self, existing_median: Option<f64>) -> Option<f64> {
1457        if self.data.is_empty() {
1458            return None;
1459        }
1460        if existing_median.is_none() {
1461            self.sort();
1462        }
1463        mad_on_sorted(&self.data, existing_median)
1464    }
1465}
1466
1467impl<T: PartialOrd + ToPrimitive + Send> Unsorted<T> {
1468    /// Returns the quartiles of the data using the traditional sorting approach.
1469    ///
1470    /// This method sorts the data first and then computes quartiles.
1471    /// Time complexity: O(n log n)
1472    #[inline]
1473    pub fn quartiles(&mut self) -> Option<(f64, f64, f64)> {
1474        if self.data.is_empty() {
1475            return None;
1476        }
1477        self.sort();
1478        quartiles_on_sorted(&self.data)
1479    }
1480}
1481
1482impl<T: PartialOrd + ToPrimitive + Send + Sync> Unsorted<T> {
1483    /// Returns the Gini Coefficient of the data.
1484    ///
1485    /// The Gini Coefficient measures inequality in a distribution, ranging from 0 (perfect equality)
1486    /// to 1 (perfect inequality). This method sorts the data first and then computes the Gini coefficient.
1487    /// Time complexity: O(n log n)
1488    #[inline]
1489    pub fn gini(&mut self, precalc_sum: Option<f64>) -> Option<f64> {
1490        if self.data.is_empty() {
1491            return None;
1492        }
1493        self.sort();
1494        gini_on_sorted(&self.data, precalc_sum)
1495    }
1496
1497    /// Returns the kurtosis (excess kurtosis) of the data.
1498    ///
1499    /// Kurtosis measures the "tailedness" of a distribution. Excess kurtosis is kurtosis - 3,
1500    /// where 0 indicates a normal distribution, positive values indicate heavy tails, and
1501    /// negative values indicate light tails. This method sorts the data first and then computes kurtosis.
1502    /// Time complexity: O(n log n)
1503    #[inline]
1504    pub fn kurtosis(
1505        &mut self,
1506        precalc_mean: Option<f64>,
1507        precalc_variance: Option<f64>,
1508    ) -> Option<f64> {
1509        if self.data.is_empty() {
1510            return None;
1511        }
1512        self.sort();
1513        kurtosis_on_sorted(&self.data, precalc_mean, precalc_variance)
1514    }
1515
1516    /// Returns the percentile rank of a value in the data.
1517    ///
1518    /// Returns the percentile rank (0-100) of the given value. If the value is less than all
1519    /// values, returns 0.0. If greater than all, returns 100.0.
1520    /// This method sorts the data first and then computes the percentile rank.
1521    /// Time complexity: O(n log n)
1522    #[inline]
1523    #[allow(clippy::needless_pass_by_value)]
1524    pub fn percentile_rank<V>(&mut self, value: V) -> Option<f64>
1525    where
1526        V: PartialOrd + ToPrimitive,
1527    {
1528        if self.data.is_empty() {
1529            return None;
1530        }
1531        self.sort();
1532        percentile_rank_on_sorted(&self.data, &value)
1533    }
1534
1535    /// Returns the Atkinson Index of the data.
1536    ///
1537    /// The Atkinson Index measures inequality with an inequality aversion parameter ε.
1538    /// It ranges from 0 (perfect equality) to 1 (perfect inequality).
1539    /// Higher ε values give more weight to inequality at the lower end of the distribution.
1540    /// This method sorts the data first and then computes the Atkinson index.
1541    /// Time complexity: O(n log n)
1542    ///
1543    /// # Arguments
1544    /// * `epsilon` - Inequality aversion parameter (must be >= 0). Common values:
1545    ///   - 0.0: No inequality aversion (returns 0)
1546    ///   - 0.5: Moderate aversion
1547    ///   - 1.0: Uses geometric mean (special case)
1548    ///   - 2.0: High aversion
1549    /// * `precalc_mean` - Optional pre-calculated mean
1550    /// * `precalc_geometric_sum` - Optional pre-calculated geometric sum (sum of ln(val)), only used when epsilon = 1
1551    #[inline]
1552    pub fn atkinson(
1553        &mut self,
1554        epsilon: f64,
1555        precalc_mean: Option<f64>,
1556        precalc_geometric_sum: Option<f64>,
1557    ) -> Option<f64> {
1558        if self.data.is_empty() {
1559            return None;
1560        }
1561        self.sort();
1562        atkinson_on_sorted(&self.data, epsilon, precalc_mean, precalc_geometric_sum)
1563    }
1564}
1565
1566impl<T: PartialOrd + ToPrimitive + Clone + Send> Unsorted<T> {
1567    /// Returns the quartiles of the data using selection algorithm.
1568    ///
1569    /// This implementation uses a selection algorithm (quickselect) to find quartiles
1570    /// in O(n) average time complexity instead of O(n log n) sorting.
1571    /// Requires T to implement Clone to create a working copy of the data.
1572    ///
1573    /// **Performance Note**: While theoretically O(n) vs O(n log n), this implementation
1574    /// is often slower than the sorting-based approach for small to medium datasets due to:
1575    /// - Need to find multiple order statistics (3 separate quickselect calls)
1576    /// - Overhead of copying data to avoid mutation
1577    /// - Rayon's highly optimized parallel sorting
1578    #[inline]
1579    pub fn quartiles_with_selection(&mut self) -> Option<(f64, f64, f64)> {
1580        if self.data.is_empty() {
1581            return None;
1582        }
1583        // Use zero-copy approach (indices-based) to avoid cloning all elements
1584        quartiles_with_zero_copy_selection(&self.data)
1585    }
1586}
1587
1588impl<T: PartialOrd + ToPrimitive + Send> Unsorted<T> {
1589    /// Returns the quartiles using zero-copy selection algorithm.
1590    ///
1591    /// This implementation avoids copying data by working with indices instead,
1592    /// providing better performance than the clone-based selection approach.
1593    /// The algorithm is O(n) average time and only allocates a vector of indices (usize).
1594    #[inline]
1595    #[must_use]
1596    pub fn quartiles_zero_copy(&self) -> Option<(f64, f64, f64)> {
1597        if self.data.is_empty() {
1598            return None;
1599        }
1600        quartiles_with_zero_copy_selection(&self.data)
1601    }
1602}
1603
1604impl<T: PartialOrd + Send> Commute for Unsorted<T> {
1605    #[inline]
1606    fn merge(&mut self, mut v: Unsorted<T>) {
1607        if v.is_empty() {
1608            return;
1609        }
1610
1611        self.sorted = false;
1612        // we use std::mem::take to avoid unnecessary allocations
1613        self.data.extend(std::mem::take(&mut v.data));
1614    }
1615}
1616
1617impl<T: PartialOrd> Default for Unsorted<T> {
1618    #[inline]
1619    fn default() -> Unsorted<T> {
1620        Unsorted {
1621            data: Vec::with_capacity(16),
1622            sorted: true, // empty is sorted
1623        }
1624    }
1625}
1626
1627impl<T: PartialOrd + Send> FromIterator<T> for Unsorted<T> {
1628    #[inline]
1629    fn from_iter<I: IntoIterator<Item = T>>(it: I) -> Unsorted<T> {
1630        let mut v = Unsorted::new();
1631        v.extend(it);
1632        v
1633    }
1634}
1635
1636impl<T: PartialOrd> Extend<T> for Unsorted<T> {
1637    #[inline]
1638    fn extend<I: IntoIterator<Item = T>>(&mut self, it: I) {
1639        self.sorted = false;
1640        self.data.extend(it.into_iter().map(Partial));
1641    }
1642}
1643
1644fn custom_percentiles_on_sorted<T>(data: &[Partial<T>], percentiles: &[u8]) -> Option<Vec<T>>
1645where
1646    T: PartialOrd + Clone,
1647{
1648    let len = data.len();
1649
1650    // Early return for empty array or invalid percentiles
1651    if len == 0 || percentiles.iter().any(|&p| p > 100) {
1652        return None;
1653    }
1654
1655    // Optimize: Check if percentiles are already sorted and unique
1656    let unique_percentiles: Vec<u8> = if percentiles.len() <= 1 {
1657        // Single or empty percentile - no need to sort/dedup
1658        percentiles.to_vec()
1659    } else {
1660        // Check if already sorted and unique (common case)
1661        let is_sorted_unique = percentiles.array_windows::<2>().all(|[a, b]| a < b);
1662
1663        if is_sorted_unique {
1664            // Already sorted and unique, use directly without cloning
1665            percentiles.to_vec()
1666        } else {
1667            // Need to sort and dedup - use fixed-size bool array (domain is 0..=100)
1668            let mut seen = [false; 101];
1669            let mut sorted_unique = Vec::with_capacity(percentiles.len().min(101));
1670            for &p in percentiles {
1671                if !seen[p as usize] {
1672                    seen[p as usize] = true;
1673                    sorted_unique.push(p);
1674                }
1675            }
1676            sorted_unique.sort_unstable();
1677            sorted_unique
1678        }
1679    };
1680
1681    let mut results = Vec::with_capacity(unique_percentiles.len());
1682
1683    // SAFETY: All index calculations below are guaranteed to be in bounds
1684    // because we've verified len > 0 and the rank calculation ensures
1685    // the index is within bounds
1686    unsafe {
1687        for &p in &unique_percentiles {
1688            // Calculate the ordinal rank using nearest-rank method
1689            // see https://en.wikipedia.org/wiki/Percentile#The_nearest-rank_method
1690            // n = ⌈(P/100) × N⌉
1691            #[allow(clippy::cast_sign_loss)]
1692            let rank = ((f64::from(p) / 100.0) * len as f64).ceil() as usize;
1693
1694            // Convert to 0-based index
1695            let idx = rank.saturating_sub(1);
1696
1697            // Get the value at that rank and extract the inner value
1698            results.push(data.get_unchecked(idx).0.clone());
1699        }
1700    }
1701
1702    Some(results)
1703}
1704
1705impl<T: PartialOrd + Clone + Send> Unsorted<T> {
1706    /// Returns the requested percentiles of the data.
1707    ///
1708    /// Uses the nearest-rank method to compute percentiles.
1709    /// Each returned value is an actual value from the dataset.
1710    ///
1711    /// # Arguments
1712    /// * `percentiles` - A slice of u8 values representing percentiles to compute (0-100)
1713    ///
1714    /// # Returns
1715    /// * `None` if the data is empty or if any percentile is > 100
1716    /// * `Some(Vec<T>)` containing percentile values in the same order as requested
1717    ///
1718    /// # Example
1719    /// ```
1720    /// use stats::Unsorted;
1721    /// let mut data = Unsorted::new();
1722    /// data.extend(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
1723    /// let percentiles = vec![25, 50, 75];
1724    /// let results = data.custom_percentiles(&percentiles).unwrap();
1725    /// assert_eq!(results, vec![3, 5, 8]);
1726    /// ```
1727    #[inline]
1728    pub fn custom_percentiles(&mut self, percentiles: &[u8]) -> Option<Vec<T>> {
1729        if self.data.is_empty() {
1730            return None;
1731        }
1732        self.sort();
1733        custom_percentiles_on_sorted(&self.data, percentiles)
1734    }
1735}
1736
1737#[cfg(test)]
1738mod test {
1739    use super::*;
1740
1741    #[test]
1742    fn test_cardinality_empty() {
1743        let mut unsorted: Unsorted<i32> = Unsorted::new();
1744        assert_eq!(unsorted.cardinality(false, 1), 0);
1745    }
1746
1747    #[test]
1748    fn test_cardinality_single_element() {
1749        let mut unsorted = Unsorted::new();
1750        unsorted.add(5);
1751        assert_eq!(unsorted.cardinality(false, 1), 1);
1752    }
1753
1754    #[test]
1755    fn test_cardinality_unique_elements() {
1756        let mut unsorted = Unsorted::new();
1757        unsorted.extend(vec![1, 2, 3, 4, 5]);
1758        assert_eq!(unsorted.cardinality(false, 1), 5);
1759    }
1760
1761    #[test]
1762    fn test_cardinality_duplicate_elements() {
1763        let mut unsorted = Unsorted::new();
1764        unsorted.extend(vec![1, 2, 2, 3, 3, 3, 4, 4, 4, 4]);
1765        assert_eq!(unsorted.cardinality(false, 1), 4);
1766    }
1767
1768    #[test]
1769    fn test_cardinality_all_same() {
1770        let mut unsorted = Unsorted::new();
1771        unsorted.extend(vec![1; 100]);
1772        assert_eq!(unsorted.cardinality(false, 1), 1);
1773    }
1774
1775    #[test]
1776    fn test_cardinality_large_range() {
1777        let mut unsorted = Unsorted::new();
1778        unsorted.extend(0..1_000_000);
1779        assert_eq!(unsorted.cardinality(false, 1), 1_000_000);
1780    }
1781
1782    #[test]
1783    fn test_cardinality_large_range_sequential() {
1784        let mut unsorted = Unsorted::new();
1785        unsorted.extend(0..1_000_000);
1786        assert_eq!(unsorted.cardinality(false, 2_000_000), 1_000_000);
1787    }
1788
1789    #[test]
1790    fn test_cardinality_presorted() {
1791        let mut unsorted = Unsorted::new();
1792        unsorted.extend(vec![1, 2, 3, 4, 5]);
1793        unsorted.sort();
1794        assert_eq!(unsorted.cardinality(true, 1), 5);
1795    }
1796
1797    #[test]
1798    fn test_cardinality_float() {
1799        let mut unsorted = Unsorted::new();
1800        unsorted.extend(vec![1.0, 1.0, 2.0, 3.0, 3.0, 4.0]);
1801        assert_eq!(unsorted.cardinality(false, 1), 4);
1802    }
1803
1804    #[test]
1805    fn test_cardinality_string() {
1806        let mut unsorted = Unsorted::new();
1807        unsorted.extend(vec!["a", "b", "b", "c", "c", "c"]);
1808        assert_eq!(unsorted.cardinality(false, 1), 3);
1809    }
1810
1811    #[test]
1812    fn test_quartiles_selection_vs_sorted() {
1813        // Test that selection-based quartiles gives same results as sorting-based
1814        let test_cases = vec![
1815            vec![3, 5, 7, 9],
1816            vec![3, 5, 7],
1817            vec![1, 2, 7, 11],
1818            vec![3, 5, 7, 9, 12],
1819            vec![2, 2, 3, 8, 10],
1820            vec![3, 5, 7, 9, 12, 20],
1821            vec![0, 2, 4, 8, 10, 11],
1822            vec![3, 5, 7, 9, 12, 20, 21],
1823            vec![1, 5, 6, 6, 7, 10, 19],
1824        ];
1825
1826        for test_case in test_cases {
1827            let mut unsorted1 = Unsorted::new();
1828            let mut unsorted2 = Unsorted::new();
1829            let mut unsorted3 = Unsorted::new();
1830            unsorted1.extend(test_case.clone());
1831            unsorted2.extend(test_case.clone());
1832            unsorted3.extend(test_case.clone());
1833
1834            let result_sorted = unsorted1.quartiles();
1835            let result_selection = unsorted2.quartiles_with_selection();
1836            let result_zero_copy = unsorted3.quartiles_zero_copy();
1837
1838            assert_eq!(
1839                result_sorted, result_selection,
1840                "Selection mismatch for test case: {:?}",
1841                test_case
1842            );
1843            assert_eq!(
1844                result_sorted, result_zero_copy,
1845                "Zero-copy mismatch for test case: {:?}",
1846                test_case
1847            );
1848        }
1849    }
1850
1851    #[test]
1852    fn test_quartiles_with_selection_small() {
1853        // Test edge cases for selection-based quartiles
1854        let mut unsorted: Unsorted<i32> = Unsorted::new();
1855        assert_eq!(unsorted.quartiles_with_selection(), None);
1856
1857        let mut unsorted = Unsorted::new();
1858        unsorted.extend(vec![1, 2]);
1859        assert_eq!(unsorted.quartiles_with_selection(), None);
1860
1861        let mut unsorted = Unsorted::new();
1862        unsorted.extend(vec![1, 2, 3]);
1863        assert_eq!(unsorted.quartiles_with_selection(), Some((1.0, 2.0, 3.0)));
1864    }
1865
1866    #[test]
1867    fn test_quickselect() {
1868        let data = vec![
1869            Partial(3),
1870            Partial(1),
1871            Partial(4),
1872            Partial(1),
1873            Partial(5),
1874            Partial(9),
1875            Partial(2),
1876            Partial(6),
1877        ];
1878
1879        // Test finding different positions
1880        assert_eq!(quickselect(&mut data.clone(), 0), Some(&1));
1881        assert_eq!(quickselect(&mut data.clone(), 3), Some(&3));
1882        assert_eq!(quickselect(&mut data.clone(), 7), Some(&9));
1883
1884        // Test edge cases
1885        let mut empty: Vec<Partial<i32>> = vec![];
1886        assert_eq!(quickselect(&mut empty, 0), None);
1887
1888        let mut data = vec![Partial(3), Partial(1), Partial(4), Partial(1), Partial(5)];
1889        assert_eq!(quickselect(&mut data, 10), None); // k >= len
1890    }
1891
1892    #[test]
1893    fn median_stream() {
1894        assert_eq!(median(vec![3usize, 5, 7, 9].into_iter()), Some(6.0));
1895        assert_eq!(median(vec![3usize, 5, 7].into_iter()), Some(5.0));
1896    }
1897
1898    #[test]
1899    fn mad_stream() {
1900        assert_eq!(mad(vec![3usize, 5, 7, 9].into_iter(), None), Some(2.0));
1901        assert_eq!(
1902            mad(
1903                vec![
1904                    86usize, 60, 95, 39, 49, 12, 56, 82, 92, 24, 33, 28, 46, 34, 100, 39, 100, 38,
1905                    50, 61, 39, 88, 5, 13, 64
1906                ]
1907                .into_iter(),
1908                None
1909            ),
1910            Some(16.0)
1911        );
1912    }
1913
1914    #[test]
1915    fn mad_stream_precalc_median() {
1916        let data = vec![3usize, 5, 7, 9].into_iter();
1917        let median1 = median(data.clone());
1918        assert_eq!(mad(data, median1), Some(2.0));
1919
1920        let data2 = vec![
1921            86usize, 60, 95, 39, 49, 12, 56, 82, 92, 24, 33, 28, 46, 34, 100, 39, 100, 38, 50, 61,
1922            39, 88, 5, 13, 64,
1923        ]
1924        .into_iter();
1925        let median2 = median(data2.clone());
1926        assert_eq!(mad(data2, median2), Some(16.0));
1927    }
1928
1929    #[test]
1930    fn mode_stream() {
1931        assert_eq!(mode(vec![3usize, 5, 7, 9].into_iter()), None);
1932        assert_eq!(mode(vec![3usize, 3, 3, 3].into_iter()), Some(3));
1933        assert_eq!(mode(vec![3usize, 3, 3, 4].into_iter()), Some(3));
1934        assert_eq!(mode(vec![4usize, 3, 3, 3].into_iter()), Some(3));
1935        assert_eq!(mode(vec![1usize, 1, 2, 3, 3].into_iter()), None);
1936    }
1937
1938    #[test]
1939    fn median_floats() {
1940        assert_eq!(median(vec![3.0f64, 5.0, 7.0, 9.0].into_iter()), Some(6.0));
1941        assert_eq!(median(vec![3.0f64, 5.0, 7.0].into_iter()), Some(5.0));
1942    }
1943
1944    #[test]
1945    fn mode_floats() {
1946        assert_eq!(mode(vec![3.0f64, 5.0, 7.0, 9.0].into_iter()), None);
1947        assert_eq!(mode(vec![3.0f64, 3.0, 3.0, 3.0].into_iter()), Some(3.0));
1948        assert_eq!(mode(vec![3.0f64, 3.0, 3.0, 4.0].into_iter()), Some(3.0));
1949        assert_eq!(mode(vec![4.0f64, 3.0, 3.0, 3.0].into_iter()), Some(3.0));
1950        assert_eq!(mode(vec![1.0f64, 1.0, 2.0, 3.0, 3.0].into_iter()), None);
1951    }
1952
1953    #[test]
1954    fn modes_stream() {
1955        assert_eq!(modes(vec![3usize, 5, 7, 9].into_iter()), (vec![], 0, 0));
1956        assert_eq!(modes(vec![3usize, 3, 3, 3].into_iter()), (vec![3], 1, 4));
1957        assert_eq!(modes(vec![3usize, 3, 4, 4].into_iter()), (vec![3, 4], 2, 2));
1958        assert_eq!(modes(vec![4usize, 3, 3, 3].into_iter()), (vec![3], 1, 3));
1959        assert_eq!(modes(vec![1usize, 1, 2, 2].into_iter()), (vec![1, 2], 2, 2));
1960        let vec: Vec<u32> = vec![];
1961        assert_eq!(modes(vec.into_iter()), (vec![], 0, 0));
1962    }
1963
1964    #[test]
1965    fn modes_floats() {
1966        assert_eq!(
1967            modes(vec![3_f64, 5.0, 7.0, 9.0].into_iter()),
1968            (vec![], 0, 0)
1969        );
1970        assert_eq!(
1971            modes(vec![3_f64, 3.0, 3.0, 3.0].into_iter()),
1972            (vec![3.0], 1, 4)
1973        );
1974        assert_eq!(
1975            modes(vec![3_f64, 3.0, 4.0, 4.0].into_iter()),
1976            (vec![3.0, 4.0], 2, 2)
1977        );
1978        assert_eq!(
1979            modes(vec![1_f64, 1.0, 2.0, 3.0, 3.0].into_iter()),
1980            (vec![1.0, 3.0], 2, 2)
1981        );
1982    }
1983
1984    #[test]
1985    fn antimodes_stream() {
1986        assert_eq!(
1987            antimodes(vec![3usize, 5, 7, 9].into_iter()),
1988            (vec![3, 5, 7, 9], 4, 1)
1989        );
1990        assert_eq!(
1991            antimodes(vec![1usize, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13].into_iter()),
1992            (vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 13, 1)
1993        );
1994        assert_eq!(
1995            antimodes(vec![1usize, 3, 3, 3].into_iter()),
1996            (vec![1], 1, 1)
1997        );
1998        assert_eq!(
1999            antimodes(vec![3usize, 3, 4, 4].into_iter()),
2000            (vec![3, 4], 2, 2)
2001        );
2002        assert_eq!(
2003            antimodes(
2004                vec![
2005                    3usize, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
2006                    14, 14, 15, 15
2007                ]
2008                .into_iter()
2009            ),
2010            // we only show the first 10 of the 13 antimodes
2011            (vec![3, 4, 5, 6, 7, 8, 9, 10, 11, 12], 13, 2)
2012        );
2013        assert_eq!(
2014            antimodes(
2015                vec![
2016                    3usize, 3, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 4, 4, 5, 5, 6, 6, 7, 7, 13, 13,
2017                    14, 14, 15, 15
2018                ]
2019                .into_iter()
2020            ),
2021            (vec![3, 4, 5, 6, 7, 8, 9, 10, 11, 12], 13, 2)
2022        );
2023        assert_eq!(
2024            antimodes(vec![3usize, 3, 3, 4].into_iter()),
2025            (vec![4], 1, 1)
2026        );
2027        assert_eq!(
2028            antimodes(vec![4usize, 3, 3, 3].into_iter()),
2029            (vec![4], 1, 1)
2030        );
2031        assert_eq!(
2032            antimodes(vec![1usize, 1, 2, 2].into_iter()),
2033            (vec![1, 2], 2, 2)
2034        );
2035        let vec: Vec<u32> = vec![];
2036        assert_eq!(antimodes(vec.into_iter()), (vec![], 0, 0));
2037    }
2038
2039    #[test]
2040    fn antimodes_floats() {
2041        assert_eq!(
2042            antimodes(vec![3_f64, 5.0, 7.0, 9.0].into_iter()),
2043            (vec![3.0, 5.0, 7.0, 9.0], 4, 1)
2044        );
2045        assert_eq!(
2046            antimodes(vec![3_f64, 3.0, 3.0, 3.0].into_iter()),
2047            (vec![], 0, 0)
2048        );
2049        assert_eq!(
2050            antimodes(vec![3_f64, 3.0, 4.0, 4.0].into_iter()),
2051            (vec![3.0, 4.0], 2, 2)
2052        );
2053        assert_eq!(
2054            antimodes(vec![1_f64, 1.0, 2.0, 3.0, 3.0].into_iter()),
2055            (vec![2.0], 1, 1)
2056        );
2057    }
2058
2059    #[test]
2060    fn test_custom_percentiles() {
2061        // Test with integers
2062        let mut unsorted: Unsorted<i32> = Unsorted::new();
2063        unsorted.extend(1..=11); // [1,2,3,4,5,6,7,8,9,10,11]
2064
2065        let result = unsorted.custom_percentiles(&[25, 50, 75]).unwrap();
2066        assert_eq!(result, vec![3, 6, 9]);
2067
2068        // Test with strings
2069        let mut str_data = Unsorted::new();
2070        str_data.extend(vec!["a", "b", "c", "d", "e"]);
2071        let result = str_data.custom_percentiles(&[20, 40, 60, 80]).unwrap();
2072        assert_eq!(result, vec!["a", "b", "c", "d"]);
2073
2074        // Test with chars
2075        let mut char_data = Unsorted::new();
2076        char_data.extend('a'..='e');
2077        let result = char_data.custom_percentiles(&[25, 50, 75]).unwrap();
2078        assert_eq!(result, vec!['b', 'c', 'd']);
2079
2080        // Test with floats
2081        let mut float_data = Unsorted::new();
2082        float_data.extend(vec![1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9]);
2083        let result = float_data
2084            .custom_percentiles(&[10, 30, 50, 70, 90])
2085            .unwrap();
2086        assert_eq!(result, vec![1.1, 3.3, 5.5, 7.7, 9.9]);
2087
2088        // Test with empty percentiles array
2089        let result = float_data.custom_percentiles(&[]).unwrap();
2090        assert_eq!(result, Vec::<f64>::new());
2091
2092        // Test with duplicate percentiles
2093        let result = float_data.custom_percentiles(&[50, 50, 50]).unwrap();
2094        assert_eq!(result, vec![5.5]);
2095
2096        // Test with extreme percentiles
2097        let result = float_data.custom_percentiles(&[0, 100]).unwrap();
2098        assert_eq!(result, vec![1.1, 9.9]);
2099
2100        // Test with unsorted percentiles
2101        let result = float_data.custom_percentiles(&[75, 25, 50]).unwrap();
2102        assert_eq!(result, vec![3.3, 5.5, 7.7]); // results always sorted
2103
2104        // Test with single element
2105        let mut single = Unsorted::new();
2106        single.add(42);
2107        let result = single.custom_percentiles(&[0, 50, 100]).unwrap();
2108        assert_eq!(result, vec![42, 42, 42]);
2109    }
2110
2111    #[test]
2112    fn quartiles_stream() {
2113        assert_eq!(
2114            quartiles(vec![3usize, 5, 7].into_iter()),
2115            Some((3., 5., 7.))
2116        );
2117        assert_eq!(
2118            quartiles(vec![3usize, 5, 7, 9].into_iter()),
2119            Some((4., 6., 8.))
2120        );
2121        assert_eq!(
2122            quartiles(vec![1usize, 2, 7, 11].into_iter()),
2123            Some((1.5, 4.5, 9.))
2124        );
2125        assert_eq!(
2126            quartiles(vec![3usize, 5, 7, 9, 12].into_iter()),
2127            Some((4., 7., 10.5))
2128        );
2129        assert_eq!(
2130            quartiles(vec![2usize, 2, 3, 8, 10].into_iter()),
2131            Some((2., 3., 9.))
2132        );
2133        assert_eq!(
2134            quartiles(vec![3usize, 5, 7, 9, 12, 20].into_iter()),
2135            Some((5., 8., 12.))
2136        );
2137        assert_eq!(
2138            quartiles(vec![0usize, 2, 4, 8, 10, 11].into_iter()),
2139            Some((2., 6., 10.))
2140        );
2141        assert_eq!(
2142            quartiles(vec![3usize, 5, 7, 9, 12, 20, 21].into_iter()),
2143            Some((5., 9., 20.))
2144        );
2145        assert_eq!(
2146            quartiles(vec![1usize, 5, 6, 6, 7, 10, 19].into_iter()),
2147            Some((5., 6., 10.))
2148        );
2149    }
2150
2151    #[test]
2152    fn quartiles_floats() {
2153        assert_eq!(
2154            quartiles(vec![3_f64, 5., 7.].into_iter()),
2155            Some((3., 5., 7.))
2156        );
2157        assert_eq!(
2158            quartiles(vec![3_f64, 5., 7., 9.].into_iter()),
2159            Some((4., 6., 8.))
2160        );
2161        assert_eq!(
2162            quartiles(vec![3_f64, 5., 7., 9., 12.].into_iter()),
2163            Some((4., 7., 10.5))
2164        );
2165        assert_eq!(
2166            quartiles(vec![3_f64, 5., 7., 9., 12., 20.].into_iter()),
2167            Some((5., 8., 12.))
2168        );
2169        assert_eq!(
2170            quartiles(vec![3_f64, 5., 7., 9., 12., 20., 21.].into_iter()),
2171            Some((5., 9., 20.))
2172        );
2173    }
2174
2175    #[test]
2176    fn test_quartiles_zero_copy_small() {
2177        // Test edge cases for zero-copy quartiles
2178        let unsorted: Unsorted<i32> = Unsorted::new();
2179        assert_eq!(unsorted.quartiles_zero_copy(), None);
2180
2181        let mut unsorted = Unsorted::new();
2182        unsorted.extend(vec![1, 2]);
2183        assert_eq!(unsorted.quartiles_zero_copy(), None);
2184
2185        let mut unsorted = Unsorted::new();
2186        unsorted.extend(vec![1, 2, 3]);
2187        assert_eq!(unsorted.quartiles_zero_copy(), Some((1.0, 2.0, 3.0)));
2188
2189        // Test larger case
2190        let mut unsorted = Unsorted::new();
2191        unsorted.extend(vec![3, 5, 7, 9]);
2192        assert_eq!(unsorted.quartiles_zero_copy(), Some((4.0, 6.0, 8.0)));
2193    }
2194
2195    #[test]
2196    fn gini_empty() {
2197        let mut unsorted: Unsorted<i32> = Unsorted::new();
2198        assert_eq!(unsorted.gini(None), None);
2199        let empty_vec: Vec<i32> = vec![];
2200        assert_eq!(gini(empty_vec.into_iter(), None), None);
2201    }
2202
2203    #[test]
2204    fn gini_single_element() {
2205        let mut unsorted = Unsorted::new();
2206        unsorted.add(5);
2207        assert_eq!(unsorted.gini(None), Some(0.0));
2208        assert_eq!(gini(vec![5].into_iter(), None), Some(0.0));
2209    }
2210
2211    #[test]
2212    fn gini_perfect_equality() {
2213        // All values are the same - perfect equality, Gini = 0
2214        let mut unsorted = Unsorted::new();
2215        unsorted.extend(vec![10, 10, 10, 10, 10]);
2216        let result = unsorted.gini(None).unwrap();
2217        assert!((result - 0.0).abs() < 1e-10, "Expected 0.0, got {}", result);
2218
2219        assert!((gini(vec![10, 10, 10, 10, 10].into_iter(), None).unwrap() - 0.0).abs() < 1e-10);
2220    }
2221
2222    #[test]
2223    fn gini_perfect_inequality() {
2224        // One value has everything, others have zero - perfect inequality
2225        // For [0, 0, 0, 0, 100], Gini should be close to 1
2226        let mut unsorted = Unsorted::new();
2227        unsorted.extend(vec![0, 0, 0, 0, 100]);
2228        let result = unsorted.gini(None).unwrap();
2229        // Perfect inequality should give Gini close to 1
2230        // For n=5, one value=100, others=0: G = (2*5*100)/(5*100) - 6/5 = 2 - 1.2 = 0.8
2231        assert!((result - 0.8).abs() < 1e-10, "Expected 0.8, got {}", result);
2232    }
2233
2234    #[test]
2235    fn gini_stream() {
2236        // Test with known values
2237        // For [1, 2, 3, 4, 5]:
2238        // sum = 15
2239        // weighted_sum = 1*1 + 2*2 + 3*3 + 4*4 + 5*5 = 1 + 4 + 9 + 16 + 25 = 55
2240        // n = 5
2241        // G = (2 * 55) / (5 * 15) - 6/5 = 110/75 - 1.2 = 1.4667 - 1.2 = 0.2667
2242        let result = gini(vec![1usize, 2, 3, 4, 5].into_iter(), None).unwrap();
2243        let expected = (2.0 * 55.0) / (5.0 * 15.0) - 6.0 / 5.0;
2244        assert!(
2245            (result - expected).abs() < 1e-10,
2246            "Expected {}, got {}",
2247            expected,
2248            result
2249        );
2250    }
2251
2252    #[test]
2253    fn gini_floats() {
2254        let mut unsorted = Unsorted::new();
2255        unsorted.extend(vec![1.0, 2.0, 3.0, 4.0, 5.0]);
2256        let result = unsorted.gini(None).unwrap();
2257        let expected = (2.0 * 55.0) / (5.0 * 15.0) - 6.0 / 5.0;
2258        assert!((result - expected).abs() < 1e-10);
2259
2260        assert!(
2261            (gini(vec![1.0f64, 2.0, 3.0, 4.0, 5.0].into_iter(), None).unwrap() - expected).abs()
2262                < 1e-10
2263        );
2264    }
2265
2266    #[test]
2267    fn gini_all_zeros() {
2268        // All zeros - sum is zero, Gini is undefined
2269        let mut unsorted = Unsorted::new();
2270        unsorted.extend(vec![0, 0, 0, 0]);
2271        assert_eq!(unsorted.gini(None), None);
2272        assert_eq!(gini(vec![0, 0, 0, 0].into_iter(), None), None);
2273    }
2274
2275    #[test]
2276    fn gini_negative_values() {
2277        // Test with negative values (mathematically valid)
2278        let mut unsorted = Unsorted::new();
2279        unsorted.extend(vec![-5, -3, -1, 1, 3, 5]);
2280        let result = unsorted.gini(None);
2281        // Sum is 0, so Gini is undefined
2282        assert_eq!(result, None);
2283
2284        // Test with negative values that don't sum to zero
2285        let mut unsorted = Unsorted::new();
2286        unsorted.extend(vec![-2, -1, 0, 1, 2]);
2287        let result = unsorted.gini(None);
2288        // Sum is 0, so Gini is undefined
2289        assert_eq!(result, None);
2290
2291        // Test with values containing negatives that sum to non-zero
2292        // Gini is undefined for negative values, should return None
2293        let mut unsorted = Unsorted::new();
2294        unsorted.extend(vec![-1, 0, 1, 2, 3]);
2295        let result = unsorted.gini(None);
2296        assert_eq!(result, None);
2297    }
2298
2299    #[test]
2300    fn gini_known_cases() {
2301        // Test case: [1, 1, 1, 1, 1] - perfect equality
2302        let mut unsorted = Unsorted::new();
2303        unsorted.extend(vec![1, 1, 1, 1, 1]);
2304        let result = unsorted.gini(None).unwrap();
2305        assert!((result - 0.0).abs() < 1e-10);
2306
2307        // Test case: [0, 0, 0, 0, 1] - high inequality
2308        let mut unsorted = Unsorted::new();
2309        unsorted.extend(vec![0, 0, 0, 0, 1]);
2310        let result = unsorted.gini(None).unwrap();
2311        // G = (2 * 5 * 1) / (5 * 1) - 6/5 = 2 - 1.2 = 0.8
2312        assert!((result - 0.8).abs() < 1e-10);
2313
2314        // Test case: [1, 2, 3] - moderate inequality
2315        let mut unsorted = Unsorted::new();
2316        unsorted.extend(vec![1, 2, 3]);
2317        let result = unsorted.gini(None).unwrap();
2318        // sum = 6, weighted_sum = 1*1 + 2*2 + 3*3 = 1 + 4 + 9 = 14
2319        // G = (2 * 14) / (3 * 6) - 4/3 = 28/18 - 4/3 = 1.5556 - 1.3333 = 0.2222
2320        let expected = (2.0 * 14.0) / (3.0 * 6.0) - 4.0 / 3.0;
2321        assert!((result - expected).abs() < 1e-10);
2322    }
2323
2324    #[test]
2325    fn gini_precalc_sum() {
2326        // Test with pre-calculated sum
2327        let mut unsorted = Unsorted::new();
2328        unsorted.extend(vec![1, 2, 3, 4, 5]);
2329        let precalc_sum = Some(15.0);
2330        let result = unsorted.gini(precalc_sum).unwrap();
2331        let expected = (2.0 * 55.0) / (5.0 * 15.0) - 6.0 / 5.0;
2332        assert!((result - expected).abs() < 1e-10);
2333
2334        // Test that pre-calculated sum gives same result
2335        let mut unsorted2 = Unsorted::new();
2336        unsorted2.extend(vec![1, 2, 3, 4, 5]);
2337        let result2 = unsorted2.gini(None).unwrap();
2338        assert!((result - result2).abs() < 1e-10);
2339    }
2340
2341    #[test]
2342    fn gini_large_dataset() {
2343        // Test with larger dataset to exercise parallel path
2344        let data: Vec<i32> = (1..=1000).collect();
2345        let result = gini(data.iter().copied(), None);
2346        assert!(result.is_some());
2347        let gini_val = result.unwrap();
2348        // For uniform distribution, Gini should be positive but not too high
2349        assert!(gini_val > 0.0 && gini_val < 0.5);
2350    }
2351
2352    #[test]
2353    fn gini_unsorted_vs_sorted() {
2354        // Test that sorting doesn't affect result
2355        let mut unsorted1 = Unsorted::new();
2356        unsorted1.extend(vec![5, 2, 8, 1, 9, 3, 7, 4, 6]);
2357        let result1 = unsorted1.gini(None).unwrap();
2358
2359        let mut unsorted2 = Unsorted::new();
2360        unsorted2.extend(vec![1, 2, 3, 4, 5, 6, 7, 8, 9]);
2361        let result2 = unsorted2.gini(None).unwrap();
2362
2363        assert!((result1 - result2).abs() < 1e-10);
2364    }
2365
2366    #[test]
2367    fn gini_small_values() {
2368        // Test with very small values
2369        let mut unsorted = Unsorted::new();
2370        unsorted.extend(vec![0.001, 0.002, 0.003, 0.004, 0.005]);
2371        let result = unsorted.gini(None);
2372        assert!(result.is_some());
2373        // Should be same as [1, 2, 3, 4, 5] scaled down
2374        let expected = (2.0 * 55.0) / (5.0 * 15.0) - 6.0 / 5.0;
2375        assert!((result.unwrap() - expected).abs() < 1e-10);
2376    }
2377
2378    #[test]
2379    fn gini_large_values() {
2380        // Test with large values
2381        let mut unsorted = Unsorted::new();
2382        unsorted.extend(vec![1000, 2000, 3000, 4000, 5000]);
2383        let result = unsorted.gini(None);
2384        assert!(result.is_some());
2385        // Should be same as [1, 2, 3, 4, 5] scaled up
2386        let expected = (2.0 * 55.0) / (5.0 * 15.0) - 6.0 / 5.0;
2387        assert!((result.unwrap() - expected).abs() < 1e-10);
2388    }
2389
2390    #[test]
2391    fn gini_two_elements() {
2392        // Test with exactly 2 elements
2393        let mut unsorted = Unsorted::new();
2394        unsorted.extend(vec![1, 2]);
2395        let result = unsorted.gini(None).unwrap();
2396        // For [1, 2]: sum=3, weighted_sum=1*1+2*2=5, n=2
2397        // G = (2*5)/(2*3) - 3/2 = 10/6 - 1.5 = 1.6667 - 1.5 = 0.1667
2398        let expected = (2.0 * 5.0) / (2.0 * 3.0) - 3.0 / 2.0;
2399        assert!((result - expected).abs() < 1e-10);
2400    }
2401
2402    #[test]
2403    fn gini_precalc_sum_zero() {
2404        // Test with pre-calculated sum of zero (should return None)
2405        let mut unsorted = Unsorted::new();
2406        unsorted.extend(vec![1, 2, 3, 4, 5]);
2407        let result = unsorted.gini(Some(0.0));
2408        assert_eq!(result, None);
2409    }
2410
2411    #[test]
2412    fn gini_precalc_sum_negative() {
2413        // Gini is undefined for negative values, should return None
2414        let mut unsorted = Unsorted::new();
2415        unsorted.extend(vec![-5, -3, -1, 1, 3]);
2416        let result = unsorted.gini(None);
2417        assert_eq!(result, None);
2418
2419        // Negative precalculated sum should also return None
2420        let mut unsorted = Unsorted::new();
2421        unsorted.extend(vec![1, 2, 3]);
2422        let result = unsorted.gini(Some(-5.0));
2423        assert_eq!(result, None);
2424    }
2425
2426    #[test]
2427    fn gini_different_types() {
2428        // Test with different integer types
2429        let mut unsorted_u32 = Unsorted::new();
2430        unsorted_u32.extend(vec![1u32, 2, 3, 4, 5]);
2431        let result_u32 = unsorted_u32.gini(None).unwrap();
2432
2433        let mut unsorted_i64 = Unsorted::new();
2434        unsorted_i64.extend(vec![1i64, 2, 3, 4, 5]);
2435        let result_i64 = unsorted_i64.gini(None).unwrap();
2436
2437        let expected = (2.0 * 55.0) / (5.0 * 15.0) - 6.0 / 5.0;
2438        assert!((result_u32 - expected).abs() < 1e-10);
2439        assert!((result_i64 - expected).abs() < 1e-10);
2440    }
2441
2442    #[test]
2443    fn gini_extreme_inequality() {
2444        // Test with extreme inequality: one very large value, many zeros
2445        let mut unsorted = Unsorted::new();
2446        unsorted.extend(vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 1000]);
2447        let result = unsorted.gini(None).unwrap();
2448        // For [0,0,0,0,0,0,0,0,0,1000]: sum=1000, weighted_sum=10*1000=10000, n=10
2449        // G = (2*10000)/(10*1000) - 11/10 = 20/10 - 1.1 = 2 - 1.1 = 0.9
2450        assert!((result - 0.9).abs() < 1e-10);
2451    }
2452
2453    #[test]
2454    fn gini_duplicate_values() {
2455        // Test with many duplicate values
2456        let mut unsorted = Unsorted::new();
2457        unsorted.extend(vec![1, 1, 1, 5, 5, 5, 10, 10, 10]);
2458        let result = unsorted.gini(None);
2459        assert!(result.is_some());
2460        // Should be between 0 and 1
2461        let gini_val = result.unwrap();
2462        assert!((0.0..=1.0).contains(&gini_val));
2463    }
2464
2465    #[test]
2466    fn kurtosis_empty() {
2467        let mut unsorted: Unsorted<i32> = Unsorted::new();
2468        assert_eq!(unsorted.kurtosis(None, None), None);
2469        let empty_vec: Vec<i32> = vec![];
2470        assert_eq!(kurtosis(empty_vec.into_iter(), None, None), None);
2471    }
2472
2473    #[test]
2474    fn kurtosis_small() {
2475        // Need at least 4 elements
2476        let mut unsorted = Unsorted::new();
2477        unsorted.extend(vec![1, 2]);
2478        assert_eq!(unsorted.kurtosis(None, None), None);
2479
2480        let mut unsorted = Unsorted::new();
2481        unsorted.extend(vec![1, 2, 3]);
2482        assert_eq!(unsorted.kurtosis(None, None), None);
2483    }
2484
2485    #[test]
2486    fn kurtosis_normal_distribution() {
2487        // Normal distribution should have kurtosis close to 0
2488        let mut unsorted = Unsorted::new();
2489        unsorted.extend(vec![1, 2, 3, 4, 5]);
2490        let result = unsorted.kurtosis(None, None);
2491        assert!(result.is_some());
2492        // For small samples, kurtosis can vary significantly
2493    }
2494
2495    #[test]
2496    fn kurtosis_all_same() {
2497        // All same values - variance is 0, kurtosis undefined
2498        let mut unsorted = Unsorted::new();
2499        unsorted.extend(vec![5, 5, 5, 5]);
2500        assert_eq!(unsorted.kurtosis(None, None), None);
2501    }
2502
2503    #[test]
2504    fn kurtosis_stream() {
2505        let result = kurtosis(vec![1usize, 2, 3, 4, 5].into_iter(), None, None);
2506        assert!(result.is_some());
2507    }
2508
2509    #[test]
2510    fn kurtosis_precalc_mean_variance() {
2511        // Test with pre-calculated mean and variance
2512        let mut unsorted = Unsorted::new();
2513        unsorted.extend(vec![1, 2, 3, 4, 5]);
2514
2515        // Calculate mean and variance manually
2516        let mean = 3.0f64;
2517        let variance = ((1.0f64 - 3.0).powi(2)
2518            + (2.0f64 - 3.0).powi(2)
2519            + (3.0f64 - 3.0).powi(2)
2520            + (4.0f64 - 3.0).powi(2)
2521            + (5.0f64 - 3.0).powi(2))
2522            / 5.0;
2523
2524        let result = unsorted.kurtosis(Some(mean), Some(variance));
2525        assert!(result.is_some());
2526
2527        // Test that pre-calculated values give same result
2528        let mut unsorted2 = Unsorted::new();
2529        unsorted2.extend(vec![1, 2, 3, 4, 5]);
2530        let result2 = unsorted2.kurtosis(None, None);
2531        assert!((result.unwrap() - result2.unwrap()).abs() < 1e-10);
2532    }
2533
2534    #[test]
2535    fn kurtosis_precalc_mean_only() {
2536        // Test with pre-calculated mean only
2537        let mut unsorted = Unsorted::new();
2538        unsorted.extend(vec![1, 2, 3, 4, 5]);
2539        let mean = 3.0f64;
2540
2541        let result = unsorted.kurtosis(Some(mean), None);
2542        assert!(result.is_some());
2543
2544        // Test that pre-calculated mean gives same result
2545        let mut unsorted2 = Unsorted::new();
2546        unsorted2.extend(vec![1, 2, 3, 4, 5]);
2547        let result2 = unsorted2.kurtosis(None, None);
2548        assert!((result.unwrap() - result2.unwrap()).abs() < 1e-10);
2549    }
2550
2551    #[test]
2552    fn kurtosis_precalc_variance_only() {
2553        // Test with pre-calculated variance only
2554        let mut unsorted = Unsorted::new();
2555        unsorted.extend(vec![1, 2, 3, 4, 5]);
2556        let variance = ((1.0f64 - 3.0).powi(2)
2557            + (2.0f64 - 3.0).powi(2)
2558            + (3.0f64 - 3.0).powi(2)
2559            + (4.0f64 - 3.0).powi(2)
2560            + (5.0f64 - 3.0).powi(2))
2561            / 5.0;
2562
2563        let result = unsorted.kurtosis(None, Some(variance));
2564        assert!(result.is_some());
2565
2566        // Test that pre-calculated variance gives same result
2567        let mut unsorted2 = Unsorted::new();
2568        unsorted2.extend(vec![1, 2, 3, 4, 5]);
2569        let result2 = unsorted2.kurtosis(None, None);
2570        assert!((result.unwrap() - result2.unwrap()).abs() < 1e-10);
2571    }
2572
2573    #[test]
2574    fn kurtosis_exact_calculation() {
2575        // Test with exact calculation for [1, 2, 3, 4]
2576        // Mean = 2.5
2577        // Variance = ((1-2.5)^2 + (2-2.5)^2 + (3-2.5)^2 + (4-2.5)^2) / 4 = (2.25 + 0.25 + 0.25 + 2.25) / 4 = 1.25
2578        // Variance^2 = 1.5625
2579        // Fourth powers: (1-2.5)^4 + (2-2.5)^4 + (3-2.5)^4 + (4-2.5)^4 = 5.0625 + 0.0625 + 0.0625 + 5.0625 = 10.25
2580        // n = 4
2581        // Kurtosis = (4*5*10.25) / (3*2*1*1.5625) - 3*3*3/(2*1) = 205 / 9.375 - 13.5 = 21.8667 - 13.5 = 8.3667
2582        let mut unsorted = Unsorted::new();
2583        unsorted.extend(vec![1, 2, 3, 4]);
2584        let result = unsorted.kurtosis(None, None).unwrap();
2585        // For small samples, kurtosis can be very high
2586        assert!(result.is_finite());
2587    }
2588
2589    #[test]
2590    fn kurtosis_uniform_distribution() {
2591        // Uniform distribution should have negative excess kurtosis
2592        let mut unsorted = Unsorted::new();
2593        unsorted.extend(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
2594        let result = unsorted.kurtosis(None, None).unwrap();
2595        // Uniform distribution has excess kurtosis = -1.2
2596        // But for small samples, it can vary significantly
2597        assert!(result.is_finite());
2598    }
2599
2600    #[test]
2601    fn kurtosis_large_dataset() {
2602        // Test with larger dataset to exercise parallel path
2603        let data: Vec<i32> = (1..=1000).collect();
2604        let result = kurtosis(data.iter().copied(), None, None);
2605        assert!(result.is_some());
2606        let kurt_val = result.unwrap();
2607        assert!(kurt_val.is_finite());
2608    }
2609
2610    #[test]
2611    fn kurtosis_unsorted_vs_sorted() {
2612        // Test that sorting doesn't affect result
2613        let mut unsorted1 = Unsorted::new();
2614        unsorted1.extend(vec![5, 2, 8, 1, 9, 3, 7, 4, 6]);
2615        let result1 = unsorted1.kurtosis(None, None).unwrap();
2616
2617        let mut unsorted2 = Unsorted::new();
2618        unsorted2.extend(vec![1, 2, 3, 4, 5, 6, 7, 8, 9]);
2619        let result2 = unsorted2.kurtosis(None, None).unwrap();
2620
2621        assert!((result1 - result2).abs() < 1e-10);
2622    }
2623
2624    #[test]
2625    fn kurtosis_minimum_size() {
2626        // Test with exactly 4 elements (minimum required)
2627        let mut unsorted = Unsorted::new();
2628        unsorted.extend(vec![1, 2, 3, 4]);
2629        let result = unsorted.kurtosis(None, None);
2630        assert!(result.is_some());
2631        assert!(result.unwrap().is_finite());
2632    }
2633
2634    #[test]
2635    fn kurtosis_heavy_tailed() {
2636        // Test with heavy-tailed distribution (outliers)
2637        let mut unsorted = Unsorted::new();
2638        unsorted.extend(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 100]);
2639        let result = unsorted.kurtosis(None, None).unwrap();
2640        // Heavy tails should give positive excess kurtosis
2641        assert!(result.is_finite());
2642        // With an outlier, kurtosis should be positive
2643        assert!(result > -10.0); // Allow some variance but should be reasonable
2644    }
2645
2646    #[test]
2647    fn kurtosis_light_tailed() {
2648        // Test with light-tailed distribution (values close together)
2649        let mut unsorted = Unsorted::new();
2650        unsorted.extend(vec![10, 11, 12, 13, 14, 15, 16, 17, 18, 19]);
2651        let result = unsorted.kurtosis(None, None).unwrap();
2652        // Light tails might give negative excess kurtosis
2653        assert!(result.is_finite());
2654    }
2655
2656    #[test]
2657    fn kurtosis_small_variance() {
2658        // Test with very small variance (values very close together)
2659        let mut unsorted = Unsorted::new();
2660        unsorted.extend(vec![10.0, 10.001, 10.002, 10.003, 10.004]);
2661        let result = unsorted.kurtosis(None, None);
2662        // Should still compute (variance is very small but non-zero)
2663        assert!(result.is_some());
2664        assert!(result.unwrap().is_finite());
2665    }
2666
2667    #[test]
2668    fn kurtosis_precalc_zero_variance() {
2669        // Test with pre-calculated variance of zero (should return None)
2670        let mut unsorted = Unsorted::new();
2671        unsorted.extend(vec![1, 2, 3, 4, 5]);
2672        let result = unsorted.kurtosis(None, Some(0.0));
2673        assert_eq!(result, None);
2674    }
2675
2676    #[test]
2677    fn kurtosis_precalc_negative_variance() {
2678        // Test with negative variance (invalid, but should handle gracefully)
2679        let mut unsorted = Unsorted::new();
2680        unsorted.extend(vec![1, 2, 3, 4, 5]);
2681        // Negative variance is invalid, but function should handle it
2682        let result = unsorted.kurtosis(None, Some(-1.0));
2683        // Should either return None or handle it gracefully
2684        // The function computes variance_sq = variance^2, so negative becomes positive
2685        // But this is invalid input, so behavior may vary
2686        // For now, just check it doesn't panic
2687        let _ = result;
2688    }
2689
2690    #[test]
2691    fn kurtosis_different_types() {
2692        // Test with different integer types
2693        let mut unsorted_u32 = Unsorted::new();
2694        unsorted_u32.extend(vec![1u32, 2, 3, 4, 5]);
2695        let result_u32 = unsorted_u32.kurtosis(None, None).unwrap();
2696
2697        let mut unsorted_i64 = Unsorted::new();
2698        unsorted_i64.extend(vec![1i64, 2, 3, 4, 5]);
2699        let result_i64 = unsorted_i64.kurtosis(None, None).unwrap();
2700
2701        assert!((result_u32 - result_i64).abs() < 1e-10);
2702    }
2703
2704    #[test]
2705    fn kurtosis_floating_point_precision() {
2706        // Test floating point precision
2707        let mut unsorted = Unsorted::new();
2708        unsorted.extend(vec![1.1, 2.2, 3.3, 4.4, 5.5]);
2709        let result = unsorted.kurtosis(None, None);
2710        assert!(result.is_some());
2711        assert!(result.unwrap().is_finite());
2712    }
2713
2714    #[test]
2715    fn kurtosis_negative_values() {
2716        // Test with negative values
2717        let mut unsorted = Unsorted::new();
2718        unsorted.extend(vec![-5, -3, -1, 1, 3, 5]);
2719        let result = unsorted.kurtosis(None, None);
2720        assert!(result.is_some());
2721        assert!(result.unwrap().is_finite());
2722    }
2723
2724    #[test]
2725    fn kurtosis_mixed_positive_negative() {
2726        // Test with mixed positive and negative values
2727        let mut unsorted = Unsorted::new();
2728        unsorted.extend(vec![-10, -5, 0, 5, 10]);
2729        let result = unsorted.kurtosis(None, None);
2730        assert!(result.is_some());
2731        assert!(result.unwrap().is_finite());
2732    }
2733
2734    #[test]
2735    fn kurtosis_duplicate_values() {
2736        // Test with duplicate values (but not all same)
2737        let mut unsorted = Unsorted::new();
2738        unsorted.extend(vec![1, 1, 2, 2, 3, 3, 4, 4, 5, 5]);
2739        let result = unsorted.kurtosis(None, None);
2740        assert!(result.is_some());
2741        assert!(result.unwrap().is_finite());
2742    }
2743
2744    #[test]
2745    fn kurtosis_precalc_mean_wrong() {
2746        // Test that wrong pre-calculated mean gives wrong result
2747        let mut unsorted1 = Unsorted::new();
2748        unsorted1.extend(vec![1, 2, 3, 4, 5]);
2749        let correct_result = unsorted1.kurtosis(None, None).unwrap();
2750
2751        let mut unsorted2 = Unsorted::new();
2752        unsorted2.extend(vec![1, 2, 3, 4, 5]);
2753        let wrong_mean = 10.0; // Wrong mean
2754        let wrong_result = unsorted2.kurtosis(Some(wrong_mean), None).unwrap();
2755
2756        // Results should be different
2757        assert!((correct_result - wrong_result).abs() > 1e-5);
2758    }
2759
2760    #[test]
2761    fn percentile_rank_empty() {
2762        let mut unsorted: Unsorted<i32> = Unsorted::new();
2763        assert_eq!(unsorted.percentile_rank(5), None);
2764        let empty_vec: Vec<i32> = vec![];
2765        assert_eq!(percentile_rank(empty_vec.into_iter(), 5), None);
2766    }
2767
2768    #[test]
2769    fn percentile_rank_basic() {
2770        let mut unsorted = Unsorted::new();
2771        unsorted.extend(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
2772
2773        // Value less than all
2774        assert_eq!(unsorted.percentile_rank(0), Some(0.0));
2775
2776        // Value greater than all
2777        assert_eq!(unsorted.percentile_rank(11), Some(100.0));
2778
2779        // Median (5) should be around 50th percentile
2780        let rank = unsorted.percentile_rank(5).unwrap();
2781        assert!((rank - 50.0).abs() < 1.0);
2782
2783        // First value should be at 10th percentile
2784        let rank = unsorted.percentile_rank(1).unwrap();
2785        assert!((rank - 10.0).abs() < 1.0);
2786    }
2787
2788    #[test]
2789    fn percentile_rank_duplicates() {
2790        let mut unsorted = Unsorted::new();
2791        unsorted.extend(vec![1, 1, 2, 2, 3, 3, 4, 4, 5, 5]);
2792
2793        // Value 2 appears twice, should be at 40th percentile (4 values <= 2)
2794        let rank = unsorted.percentile_rank(2).unwrap();
2795        assert!((rank - 40.0).abs() < 1.0);
2796    }
2797
2798    #[test]
2799    fn percentile_rank_stream() {
2800        let result = percentile_rank(vec![1usize, 2, 3, 4, 5].into_iter(), 3);
2801        assert_eq!(result, Some(60.0)); // 3 out of 5 values <= 3
2802    }
2803
2804    #[test]
2805    fn percentile_rank_many_ties() {
2806        // 100 copies of 5 followed by 100 copies of 10 — tests O(log n) upper bound
2807        let mut unsorted = Unsorted::new();
2808        for _ in 0..100 {
2809            unsorted.add(5u32);
2810        }
2811        for _ in 0..100 {
2812            unsorted.add(10u32);
2813        }
2814        // 100 values <= 5 out of 200
2815        let rank = unsorted.percentile_rank(5).unwrap();
2816        assert!((rank - 50.0).abs() < f64::EPSILON);
2817        // All 200 values <= 10
2818        let mut unsorted2 = Unsorted::new();
2819        for _ in 0..100 {
2820            unsorted2.add(5u32);
2821        }
2822        for _ in 0..100 {
2823            unsorted2.add(10u32);
2824        }
2825        let rank = unsorted2.percentile_rank(10).unwrap();
2826        assert!((rank - 100.0).abs() < f64::EPSILON);
2827    }
2828
2829    #[test]
2830    fn atkinson_empty() {
2831        let mut unsorted: Unsorted<i32> = Unsorted::new();
2832        assert_eq!(unsorted.atkinson(1.0, None, None), None);
2833        let empty_vec: Vec<i32> = vec![];
2834        assert_eq!(atkinson(empty_vec.into_iter(), 1.0, None, None), None);
2835    }
2836
2837    #[test]
2838    fn atkinson_single_element() {
2839        let mut unsorted = Unsorted::new();
2840        unsorted.add(5);
2841        assert_eq!(unsorted.atkinson(1.0, None, None), Some(0.0));
2842        assert_eq!(atkinson(vec![5].into_iter(), 1.0, None, None), Some(0.0));
2843    }
2844
2845    #[test]
2846    fn atkinson_perfect_equality() {
2847        // All values the same - perfect equality, Atkinson = 0
2848        let mut unsorted = Unsorted::new();
2849        unsorted.extend(vec![10, 10, 10, 10, 10]);
2850        let result = unsorted.atkinson(1.0, None, None).unwrap();
2851        assert!((result - 0.0).abs() < 1e-10);
2852    }
2853
2854    #[test]
2855    fn atkinson_epsilon_zero() {
2856        // Epsilon = 0 means no inequality aversion, should return 0
2857        let mut unsorted = Unsorted::new();
2858        unsorted.extend(vec![1, 2, 3, 4, 5]);
2859        let result = unsorted.atkinson(0.0, None, None).unwrap();
2860        assert!((result - 0.0).abs() < 1e-10);
2861    }
2862
2863    #[test]
2864    fn atkinson_epsilon_one() {
2865        // Epsilon = 1 uses geometric mean
2866        let mut unsorted = Unsorted::new();
2867        unsorted.extend(vec![1, 2, 3, 4, 5]);
2868        let result = unsorted.atkinson(1.0, None, None);
2869        assert!(result.is_some());
2870    }
2871
2872    #[test]
2873    fn atkinson_epsilon_one_rejects_nan() {
2874        // NaN in the data must return None, not Some(NaN), for the fused
2875        // (epsilon=1, no precalc) fast path.
2876        let mut unsorted = Unsorted::new();
2877        unsorted.extend(vec![1.0_f64, 2.0, f64::NAN, 4.0, 5.0]);
2878        assert_eq!(unsorted.atkinson(1.0, None, None), None);
2879    }
2880
2881    #[test]
2882    fn atkinson_negative_epsilon() {
2883        let mut unsorted = Unsorted::new();
2884        unsorted.extend(vec![1, 2, 3, 4, 5]);
2885        assert_eq!(unsorted.atkinson(-1.0, None, None), None);
2886    }
2887
2888    #[test]
2889    fn atkinson_zero_mean() {
2890        // If mean is zero, Atkinson is undefined
2891        let mut unsorted = Unsorted::new();
2892        unsorted.extend(vec![0, 0, 0, 0]);
2893        assert_eq!(unsorted.atkinson(1.0, None, None), None);
2894    }
2895
2896    #[test]
2897    fn atkinson_stream() {
2898        let result = atkinson(vec![1usize, 2, 3, 4, 5].into_iter(), 1.0, None, None);
2899        assert!(result.is_some());
2900    }
2901
2902    #[test]
2903    fn atkinson_precalc_mean_geometric_sum() {
2904        // Test with pre-calculated mean and geometric_sum
2905        let mut unsorted = Unsorted::new();
2906        unsorted.extend(vec![1, 2, 3, 4, 5]);
2907
2908        // Calculate mean and geometric_sum manually
2909        let mean = 3.0f64;
2910        let geometric_sum = 1.0f64.ln() + 2.0f64.ln() + 3.0f64.ln() + 4.0f64.ln() + 5.0f64.ln();
2911
2912        let result = unsorted.atkinson(1.0, Some(mean), Some(geometric_sum));
2913        assert!(result.is_some());
2914
2915        // Test that pre-calculated values give same result
2916        let mut unsorted2 = Unsorted::new();
2917        unsorted2.extend(vec![1, 2, 3, 4, 5]);
2918        let result2 = unsorted2.atkinson(1.0, None, None);
2919        assert!((result.unwrap() - result2.unwrap()).abs() < 1e-10);
2920    }
2921
2922    #[test]
2923    fn atkinson_precalc_mean_only() {
2924        // Test with pre-calculated mean only
2925        let mut unsorted = Unsorted::new();
2926        unsorted.extend(vec![1, 2, 3, 4, 5]);
2927        let mean = 3.0f64;
2928
2929        let result = unsorted.atkinson(1.0, Some(mean), None);
2930        assert!(result.is_some());
2931
2932        // Test that pre-calculated mean gives same result
2933        let mut unsorted2 = Unsorted::new();
2934        unsorted2.extend(vec![1, 2, 3, 4, 5]);
2935        let result2 = unsorted2.atkinson(1.0, None, None);
2936        assert!((result.unwrap() - result2.unwrap()).abs() < 1e-10);
2937    }
2938
2939    #[test]
2940    fn atkinson_precalc_geometric_sum_only() {
2941        // Test with pre-calculated geometric_sum only
2942        let mut unsorted = Unsorted::new();
2943        unsorted.extend(vec![1, 2, 3, 4, 5]);
2944        let geometric_sum = 1.0f64.ln() + 2.0f64.ln() + 3.0f64.ln() + 4.0f64.ln() + 5.0f64.ln();
2945
2946        let result = unsorted.atkinson(1.0, None, Some(geometric_sum));
2947        assert!(result.is_some());
2948
2949        // Test that pre-calculated geometric_sum gives same result
2950        let mut unsorted2 = Unsorted::new();
2951        unsorted2.extend(vec![1, 2, 3, 4, 5]);
2952        let result2 = unsorted2.atkinson(1.0, None, None);
2953        assert!((result.unwrap() - result2.unwrap()).abs() < 1e-10);
2954    }
2955
2956    #[test]
2957    fn test_median_with_infinity() {
2958        let mut unsorted = Unsorted::new();
2959        unsorted.extend(vec![1.0f64, 2.0, f64::INFINITY]);
2960        assert_eq!(unsorted.median(), Some(2.0));
2961    }
2962
2963    #[test]
2964    fn test_median_with_neg_infinity() {
2965        let mut unsorted = Unsorted::new();
2966        unsorted.extend(vec![f64::NEG_INFINITY, 1.0f64, 2.0]);
2967        assert_eq!(unsorted.median(), Some(1.0));
2968    }
2969
2970    #[test]
2971    fn test_quartiles_with_infinity() {
2972        let mut unsorted = Unsorted::new();
2973        unsorted.extend(vec![f64::NEG_INFINITY, 1.0, 2.0, 3.0, f64::INFINITY]);
2974        let q = unsorted.quartiles();
2975        // Q2 (median) should be 2.0
2976        assert!(q.is_some());
2977        let (_, q2, _) = q.unwrap();
2978        assert_eq!(q2, 2.0);
2979    }
2980
2981    #[test]
2982    fn test_mode_with_nan() {
2983        // NaN breaks the Ord contract via Partial<T>, so sort order is
2984        // non-deterministic. We only verify the call doesn't panic —
2985        // the exact mode value depends on where NaN lands after sorting.
2986        let mut unsorted: Unsorted<f64> = Unsorted::new();
2987        unsorted.extend(vec![1.0, f64::NAN, 2.0, 2.0, 3.0]);
2988        let _result = unsorted.mode(); // must not panic
2989    }
2990
2991    #[test]
2992    fn test_gini_with_infinity() {
2993        let mut unsorted = Unsorted::new();
2994        unsorted.extend(vec![1.0f64, 2.0, f64::INFINITY]);
2995        let g = unsorted.gini(None);
2996        // Gini with infinity in the data: the weighted_sum/sum ratio involves
2997        // Inf/Inf which is NaN, so the result is Some(NaN) — not a meaningful
2998        // Gini coefficient, but importantly does not panic
2999        assert!(g.unwrap().is_nan());
3000    }
3001
3002    #[test]
3003    fn test_cardinality_with_infinity() {
3004        let mut unsorted = Unsorted::new();
3005        unsorted.extend(vec![1.0f64, f64::INFINITY, f64::NEG_INFINITY, 1.0]);
3006        assert_eq!(unsorted.cardinality(false, 10_000), 3);
3007    }
3008}
3009
3010#[cfg(test)]
3011mod bench {
3012    use super::*;
3013    use std::time::Instant;
3014
3015    #[test]
3016    #[ignore] // Run with `cargo test comprehensive_quartiles_benchmark -- --ignored --nocapture` to see performance comparison
3017    fn comprehensive_quartiles_benchmark() {
3018        // Test a wide range of data sizes
3019        let data_sizes = vec![
3020            1_000, 10_000, 100_000, 500_000, 1_000_000, 2_000_000, 5_000_000, 10_000_000,
3021        ];
3022
3023        println!("=== COMPREHENSIVE QUARTILES BENCHMARK ===\n");
3024
3025        for size in data_sizes {
3026            println!("--- Testing with {} elements ---", size);
3027
3028            // Test different data patterns
3029            let test_patterns = vec![
3030                ("Random", generate_random_data(size)),
3031                ("Reverse Sorted", {
3032                    let mut v = Vec::with_capacity(size);
3033                    for x in (0..size).rev() {
3034                        v.push(x as i32);
3035                    }
3036                    v
3037                }),
3038                ("Already Sorted", {
3039                    let mut v = Vec::with_capacity(size);
3040                    for x in 0..size {
3041                        v.push(x as i32);
3042                    }
3043                    v
3044                }),
3045                ("Many Duplicates", {
3046                    // Create a vector with just a few distinct values repeated many times
3047                    let mut v = Vec::with_capacity(size);
3048                    let chunk_size = size / 100;
3049                    for i in 0..100 {
3050                        v.extend(std::iter::repeat_n(i, chunk_size));
3051                    }
3052                    // Add any remaining elements
3053                    v.extend(std::iter::repeat_n(0, size - v.len()));
3054                    v
3055                }),
3056            ];
3057
3058            for (pattern_name, test_data) in test_patterns {
3059                println!("\n  Pattern: {}", pattern_name);
3060
3061                // Benchmark sorting-based approach
3062                let mut unsorted1 = Unsorted::new();
3063                unsorted1.extend(test_data.clone());
3064
3065                let start = Instant::now();
3066                let result_sorted = unsorted1.quartiles();
3067                let sorted_time = start.elapsed();
3068
3069                // Benchmark selection-based approach (with copying)
3070                let mut unsorted2 = Unsorted::new();
3071                unsorted2.extend(test_data.clone());
3072
3073                let start = Instant::now();
3074                let result_selection = unsorted2.quartiles_with_selection();
3075                let selection_time = start.elapsed();
3076
3077                // Benchmark zero-copy selection-based approach
3078                let mut unsorted3 = Unsorted::new();
3079                unsorted3.extend(test_data);
3080
3081                let start = Instant::now();
3082                let result_zero_copy = unsorted3.quartiles_zero_copy();
3083                let zero_copy_time = start.elapsed();
3084
3085                // Verify results are the same
3086                assert_eq!(result_sorted, result_selection);
3087                assert_eq!(result_sorted, result_zero_copy);
3088
3089                let selection_speedup =
3090                    sorted_time.as_nanos() as f64 / selection_time.as_nanos() as f64;
3091                let zero_copy_speedup =
3092                    sorted_time.as_nanos() as f64 / zero_copy_time.as_nanos() as f64;
3093
3094                println!("    Sorting:       {:>12?}", sorted_time);
3095                println!(
3096                    "    Selection:     {:>12?} (speedup: {:.2}x)",
3097                    selection_time, selection_speedup
3098                );
3099                println!(
3100                    "    Zero-copy:     {:>12?} (speedup: {:.2}x)",
3101                    zero_copy_time, zero_copy_speedup
3102                );
3103
3104                let best_algorithm =
3105                    if zero_copy_speedup > 1.0 && zero_copy_speedup >= selection_speedup {
3106                        "ZERO-COPY"
3107                    } else if selection_speedup > 1.0 {
3108                        "SELECTION"
3109                    } else {
3110                        "SORTING"
3111                    };
3112                println!("    Best: {}", best_algorithm);
3113            }
3114
3115            println!(); // Add blank line between sizes
3116        }
3117    }
3118
3119    // Generate random data for benchmarking
3120    fn generate_random_data(size: usize) -> Vec<i32> {
3121        // Simple LCG random number generator for reproducible results
3122        let mut rng = 1234567u64;
3123        let mut vec = Vec::with_capacity(size);
3124        for _ in 0..size {
3125            rng = rng.wrapping_mul(1103515245).wrapping_add(12345);
3126            vec.push((rng >> 16) as i32);
3127        }
3128        vec
3129    }
3130
3131    #[test]
3132    #[ignore] // Run with `cargo test find_selection_threshold -- --ignored --nocapture` to find exact threshold
3133    fn find_selection_threshold() {
3134        println!("=== FINDING SELECTION ALGORITHM THRESHOLD ===\n");
3135
3136        // Binary search approach to find the threshold
3137        let mut found_threshold = None;
3138        let test_sizes = vec![
3139            1_000_000, 2_000_000, 3_000_000, 4_000_000, 5_000_000, 7_500_000, 10_000_000,
3140            15_000_000, 20_000_000, 25_000_000, 30_000_000,
3141        ];
3142
3143        for size in test_sizes {
3144            println!("Testing size: {}", size);
3145
3146            // Use random data as it's most representative of real-world scenarios
3147            let test_data = generate_random_data(size);
3148
3149            // Run multiple iterations to get average performance
3150            let iterations = 3;
3151            let mut sorting_total = 0u128;
3152            let mut selection_total = 0u128;
3153            let mut zero_copy_total = 0u128;
3154
3155            for i in 0..iterations {
3156                println!("  Iteration {}/{}", i + 1, iterations);
3157
3158                // Sorting approach
3159                let mut unsorted1 = Unsorted::new();
3160                unsorted1.extend(test_data.clone());
3161
3162                let start = Instant::now();
3163                let _result_sorted = unsorted1.quartiles();
3164                sorting_total += start.elapsed().as_nanos();
3165
3166                // Selection approach (with copying)
3167                let mut unsorted2 = Unsorted::new();
3168                unsorted2.extend(test_data.clone());
3169
3170                let start = Instant::now();
3171                let _result_selection = unsorted2.quartiles_with_selection();
3172                selection_total += start.elapsed().as_nanos();
3173
3174                // Zero-copy selection approach
3175                let mut unsorted3 = Unsorted::new();
3176                unsorted3.extend(test_data.clone());
3177
3178                let start = Instant::now();
3179                let _result_zero_copy = unsorted3.quartiles_zero_copy();
3180                zero_copy_total += start.elapsed().as_nanos();
3181            }
3182
3183            let avg_sorting = sorting_total / iterations as u128;
3184            let avg_selection = selection_total / iterations as u128;
3185            let avg_zero_copy = zero_copy_total / iterations as u128;
3186            let selection_speedup = avg_sorting as f64 / avg_selection as f64;
3187            let zero_copy_speedup = avg_sorting as f64 / avg_zero_copy as f64;
3188
3189            println!(
3190                "  Average sorting:    {:>12.2}ms",
3191                avg_sorting as f64 / 1_000_000.0
3192            );
3193            println!(
3194                "  Average selection:  {:>12.2}ms (speedup: {:.2}x)",
3195                avg_selection as f64 / 1_000_000.0,
3196                selection_speedup
3197            );
3198            println!(
3199                "  Average zero-copy:  {:>12.2}ms (speedup: {:.2}x)",
3200                avg_zero_copy as f64 / 1_000_000.0,
3201                zero_copy_speedup
3202            );
3203
3204            if (selection_speedup > 1.0 || zero_copy_speedup > 1.0) && found_threshold.is_none() {
3205                found_threshold = Some(size);
3206                let best_method = if zero_copy_speedup > selection_speedup {
3207                    "Zero-copy"
3208                } else {
3209                    "Selection"
3210                };
3211                println!(
3212                    "  *** THRESHOLD FOUND: {} becomes faster at {} elements ***",
3213                    best_method, size
3214                );
3215            }
3216
3217            println!();
3218        }
3219
3220        match found_threshold {
3221            Some(threshold) => println!(
3222                "🎯 Selection algorithm becomes faster at approximately {} elements",
3223                threshold
3224            ),
3225            None => println!("❌ Selection algorithm did not become faster in the tested range"),
3226        }
3227    }
3228
3229    #[test]
3230    #[ignore] // Run with `cargo test benchmark_different_data_types -- --ignored --nocapture` to test different data types
3231    fn benchmark_different_data_types() {
3232        println!("=== BENCHMARKING DIFFERENT DATA TYPES ===\n");
3233
3234        let size = 5_000_000; // Use a large size where differences might be visible
3235
3236        // Test with f64 (floating point)
3237        println!("Testing with f64 data:");
3238        let float_data: Vec<f64> = generate_random_data(size)
3239            .into_iter()
3240            .map(|x| x as f64 / 1000.0)
3241            .collect();
3242
3243        let mut unsorted1 = Unsorted::new();
3244        unsorted1.extend(float_data.clone());
3245        let start = Instant::now();
3246        let _result = unsorted1.quartiles();
3247        let sorting_time = start.elapsed();
3248
3249        let mut unsorted2 = Unsorted::new();
3250        unsorted2.extend(float_data.clone());
3251        let start = Instant::now();
3252        let _result = unsorted2.quartiles_with_selection();
3253        let selection_time = start.elapsed();
3254
3255        let mut unsorted3 = Unsorted::new();
3256        unsorted3.extend(float_data);
3257        let start = Instant::now();
3258        let _result = unsorted3.quartiles_zero_copy();
3259        let zero_copy_time = start.elapsed();
3260
3261        println!("  Sorting:    {:?}", sorting_time);
3262        println!("  Selection:  {:?}", selection_time);
3263        println!("  Zero-copy:  {:?}", zero_copy_time);
3264        println!(
3265            "  Selection Speedup:  {:.2}x",
3266            sorting_time.as_nanos() as f64 / selection_time.as_nanos() as f64
3267        );
3268        println!(
3269            "  Zero-copy Speedup:  {:.2}x\n",
3270            sorting_time.as_nanos() as f64 / zero_copy_time.as_nanos() as f64
3271        );
3272
3273        // Test with i64 (larger integers)
3274        println!("Testing with i64 data:");
3275        let int64_data: Vec<i64> = generate_random_data(size)
3276            .into_iter()
3277            .map(|x| x as i64 * 1000)
3278            .collect();
3279
3280        let mut unsorted1 = Unsorted::new();
3281        unsorted1.extend(int64_data.clone());
3282        let start = Instant::now();
3283        let _result = unsorted1.quartiles();
3284        let sorting_time = start.elapsed();
3285
3286        let mut unsorted2 = Unsorted::new();
3287        unsorted2.extend(int64_data.clone());
3288        let start = Instant::now();
3289        let _result = unsorted2.quartiles_with_selection();
3290        let selection_time = start.elapsed();
3291
3292        let mut unsorted3 = Unsorted::new();
3293        unsorted3.extend(int64_data);
3294        let start = Instant::now();
3295        let _result = unsorted3.quartiles_zero_copy();
3296        let zero_copy_time = start.elapsed();
3297
3298        println!("  Sorting:    {:?}", sorting_time);
3299        println!("  Selection:  {:?}", selection_time);
3300        println!("  Zero-copy:  {:?}", zero_copy_time);
3301        println!(
3302            "  Selection Speedup:  {:.2}x",
3303            sorting_time.as_nanos() as f64 / selection_time.as_nanos() as f64
3304        );
3305        println!(
3306            "  Zero-copy Speedup:  {:.2}x",
3307            sorting_time.as_nanos() as f64 / zero_copy_time.as_nanos() as f64
3308        );
3309    }
3310}