stats/
unsorted.rs

1use num_traits::ToPrimitive;
2use rayon::iter::{IndexedParallelIterator, IntoParallelRefIterator, ParallelIterator};
3use rayon::prelude::ParallelSlice;
4use rayon::slice::ParallelSliceMut;
5
6use serde::{Deserialize, Serialize};
7
8use {crate::Commute, crate::Partial};
9
10// PARALLEL_THRESHOLD (10,000) is the minimum dataset size for rayon parallel sort.
11// The separate 10,240 threshold in cardinality estimation (5 × 2,048) is aligned to
12// cache-line-friendly chunk sizes for parallel iterator reduction.
13const PARALLEL_THRESHOLD: usize = 10_000;
14
15/// Compute the exact median on a stream of data.
16///
17/// (This has time complexity `O(nlogn)` and space complexity `O(n)`.)
18#[inline]
19pub fn median<I>(it: I) -> Option<f64>
20where
21    I: Iterator,
22    <I as Iterator>::Item: PartialOrd + ToPrimitive + Send,
23{
24    it.collect::<Unsorted<_>>().median()
25}
26
27/// Compute the median absolute deviation (MAD) on a stream of data.
28#[inline]
29pub fn mad<I>(it: I, precalc_median: Option<f64>) -> Option<f64>
30where
31    I: Iterator,
32    <I as Iterator>::Item: PartialOrd + ToPrimitive + Send + Sync,
33{
34    it.collect::<Unsorted<_>>().mad(precalc_median)
35}
36
37/// Compute the exact 1-, 2-, and 3-quartiles (Q1, Q2 a.k.a. median, and Q3) on a stream of data.
38///
39/// (This has time complexity `O(n log n)` and space complexity `O(n)`.)
40#[inline]
41pub fn quartiles<I>(it: I) -> Option<(f64, f64, f64)>
42where
43    I: Iterator,
44    <I as Iterator>::Item: PartialOrd + ToPrimitive + Send,
45{
46    it.collect::<Unsorted<_>>().quartiles()
47}
48
49/// Compute the exact mode on a stream of data.
50///
51/// (This has time complexity `O(nlogn)` and space complexity `O(n)`.)
52///
53/// If the data does not have a mode, then `None` is returned.
54#[inline]
55pub fn mode<T, I>(it: I) -> Option<T>
56where
57    T: PartialOrd + Clone + Send,
58    I: Iterator<Item = T>,
59{
60    it.collect::<Unsorted<T>>().mode()
61}
62
63/// Compute the modes on a stream of data.
64///
65/// If there is a single mode, then only that value is returned in the `Vec`
66/// however, if there are multiple values tied for occurring the most amount of times
67/// those values are returned.
68///
69/// ## Example
70/// ```
71/// use stats;
72///
73/// let vals = vec![1, 1, 2, 2, 3];
74///
75/// assert_eq!(stats::modes(vals.into_iter()), (vec![1, 2], 2, 2));
76/// ```
77/// This has time complexity `O(n)`
78///
79/// If the data does not have a mode, then an empty `Vec` is returned.
80#[inline]
81pub fn modes<T, I>(it: I) -> (Vec<T>, usize, u32)
82where
83    T: PartialOrd + Clone + Send,
84    I: Iterator<Item = T>,
85{
86    it.collect::<Unsorted<T>>().modes()
87}
88
89/// Compute the antimodes on a stream of data.
90///
91/// Antimode is the least frequent non-zero score.
92///
93/// If there is a single antimode, then only that value is returned in the `Vec`
94/// however, if there are multiple values tied for occurring the least amount of times
95/// those values are returned.
96///
97/// Only the first 10 antimodes are returned to prevent returning the whole set
98/// when cardinality = number of records (i.e. all unique values).
99///
100/// ## Example
101/// ```
102/// use stats;
103///
104/// let vals = vec![1, 1, 2, 2, 3];
105///
106/// assert_eq!(stats::antimodes(vals.into_iter()), (vec![3], 1, 1));
107/// ```
108/// This has time complexity `O(n)`
109///
110/// If the data does not have an antimode, then an empty `Vec` is returned.
111#[inline]
112pub fn antimodes<T, I>(it: I) -> (Vec<T>, usize, u32)
113where
114    T: PartialOrd + Clone + Send,
115    I: Iterator<Item = T>,
116{
117    let (antimodes_result, antimodes_count, antimodes_occurrences) =
118        it.collect::<Unsorted<T>>().antimodes();
119    (antimodes_result, antimodes_count, antimodes_occurrences)
120}
121
122/// Compute the Gini Coefficient on a stream of data.
123///
124/// The Gini Coefficient measures inequality in a distribution, ranging from 0 (perfect equality)
125/// to 1 (perfect inequality).
126///
127/// (This has time complexity `O(n log n)` and space complexity `O(n)`.)
128#[inline]
129pub fn gini<I>(it: I, precalc_sum: Option<f64>) -> Option<f64>
130where
131    I: Iterator,
132    <I as Iterator>::Item: PartialOrd + ToPrimitive + Send + Sync,
133{
134    it.collect::<Unsorted<_>>().gini(precalc_sum)
135}
136
137/// Compute the kurtosis (excess kurtosis) on a stream of data.
138///
139/// Kurtosis measures the "tailedness" of a distribution. Excess kurtosis is kurtosis - 3,
140/// where 0 indicates a normal distribution, positive values indicate heavy tails, and
141/// negative values indicate light tails.
142///
143/// (This has time complexity `O(n log n)` and space complexity `O(n)`.)
144#[inline]
145pub fn kurtosis<I>(it: I, precalc_mean: Option<f64>, precalc_variance: Option<f64>) -> Option<f64>
146where
147    I: Iterator,
148    <I as Iterator>::Item: PartialOrd + ToPrimitive + Send + Sync,
149{
150    it.collect::<Unsorted<_>>()
151        .kurtosis(precalc_mean, precalc_variance)
152}
153
154/// Compute the percentile rank of a value on a stream of data.
155///
156/// Returns the percentile rank (0-100) of the given value in the distribution.
157/// If the value is less than all values, returns 0.0. If greater than all, returns 100.0.
158///
159/// (This has time complexity `O(n log n)` and space complexity `O(n)`.)
160#[inline]
161pub fn percentile_rank<I, V>(it: I, value: V) -> Option<f64>
162where
163    I: Iterator,
164    <I as Iterator>::Item: PartialOrd + ToPrimitive + Send + Sync,
165    V: PartialOrd + ToPrimitive,
166{
167    it.collect::<Unsorted<_>>().percentile_rank(value)
168}
169
170/// Compute the Atkinson Index on a stream of data.
171///
172/// The Atkinson Index measures inequality with an inequality aversion parameter ε.
173/// It ranges from 0 (perfect equality) to 1 (perfect inequality).
174/// Higher ε values give more weight to inequality at the lower end of the distribution.
175///
176/// (This has time complexity `O(n log n)` and space complexity `O(n)`.)
177#[inline]
178pub fn atkinson<I>(
179    it: I,
180    epsilon: f64,
181    precalc_mean: Option<f64>,
182    precalc_geometric_sum: Option<f64>,
183) -> Option<f64>
184where
185    I: Iterator,
186    <I as Iterator>::Item: PartialOrd + ToPrimitive + Send + Sync,
187{
188    it.collect::<Unsorted<_>>()
189        .atkinson(epsilon, precalc_mean, precalc_geometric_sum)
190}
191
192fn median_on_sorted<T>(data: &[T]) -> Option<f64>
193where
194    T: PartialOrd + ToPrimitive,
195{
196    Some(match data.len() {
197        // Empty slice case - return None early
198        0 => return None,
199        // Single element case - return that element converted to f64
200        1 => data.first()?.to_f64()?,
201        // Even length case - average the two middle elements
202        len if len.is_multiple_of(2) => {
203            let idx = len / 2;
204            // Safety: we know these indices are valid because we checked len is even and non-zero,
205            // so idx-1 and idx are valid indices into data
206            let v1 = unsafe { data.get_unchecked(idx - 1) }.to_f64()?;
207            let v2 = unsafe { data.get_unchecked(idx) }.to_f64()?;
208            f64::midpoint(v1, v2)
209        }
210        // Odd length case - return the middle element
211        // Safety: we know the index is within bounds
212        len => unsafe { data.get_unchecked(len / 2) }.to_f64()?,
213    })
214}
215
216fn mad_on_sorted<T>(data: &[T], precalc_median: Option<f64>) -> Option<f64>
217where
218    T: Sync + PartialOrd + ToPrimitive,
219{
220    if data.is_empty() {
221        return None;
222    }
223    let median_obs = precalc_median.unwrap_or_else(|| median_on_sorted(data).unwrap());
224
225    // Use adaptive parallel processing based on data size
226    let mut abs_diff_vec: Vec<f64> = if data.len() < PARALLEL_THRESHOLD {
227        // Sequential processing for small datasets
228        // Iterator collect enables TrustedLen optimization, eliminating per-element bounds checks
229        data.iter()
230            // SAFETY: to_f64() always returns Some for standard numeric types (f32/f64, i/u 8-64)
231            .map(|x| (median_obs - unsafe { x.to_f64().unwrap_unchecked() }).abs())
232            .collect()
233    } else {
234        // Parallel processing for large datasets
235        data.par_iter()
236            // SAFETY: to_f64() always returns Some for standard numeric types
237            .map(|x| (median_obs - unsafe { x.to_f64().unwrap_unchecked() }).abs())
238            .collect()
239    };
240
241    // Use select_nth_unstable to find the median in O(n) instead of O(n log n) full sort
242    let len = abs_diff_vec.len();
243    let mid = len / 2;
244    let cmp = |a: &f64, b: &f64| a.total_cmp(b);
245
246    abs_diff_vec.select_nth_unstable_by(mid, cmp);
247
248    if len.is_multiple_of(2) {
249        // Even length: need both mid-1 and mid elements
250        let right = abs_diff_vec[mid];
251        // The left partition [0..mid] contains elements <= abs_diff_vec[mid],
252        // so we can find the max of the left partition for mid-1
253        let left = abs_diff_vec[..mid]
254            .iter()
255            .max_by(|a, b| cmp(a, b))
256            .copied()?;
257        Some(f64::midpoint(left, right))
258    } else {
259        Some(abs_diff_vec[mid])
260    }
261}
262
263fn gini_on_sorted<T>(data: &[Partial<T>], precalc_sum: Option<f64>) -> Option<f64>
264where
265    T: Sync + PartialOrd + ToPrimitive,
266{
267    let len = data.len();
268
269    // Early return for empty data
270    if len == 0 {
271        return None;
272    }
273
274    // Single element case: perfect equality, Gini = 0
275    if len == 1 {
276        return Some(0.0);
277    }
278
279    // Gini coefficient is only defined for non-negative distributions.
280    // Since data is sorted, check the first (smallest) element.
281    // SAFETY: len > 1 guaranteed by checks above
282    let first_val = unsafe { data.get_unchecked(0).0.to_f64().unwrap_unchecked() };
283    if first_val < 0.0 {
284        return None;
285    }
286
287    // Compute sum and weighted sum.
288    // When precalc_sum is provided, only compute weighted_sum in a single pass.
289    // When not provided, fuse both computations into a single pass over the data
290    // to halve cache pressure (following the fold/reduce pattern used in kurtosis).
291    let (sum, weighted_sum) = if let Some(precalc) = precalc_sum {
292        if precalc < 0.0 {
293            return None;
294        }
295        // Only need weighted_sum — single pass
296        let weighted_sum = if len < PARALLEL_THRESHOLD {
297            let mut weighted_sum = 0.0;
298            for (i, x) in data.iter().enumerate() {
299                // SAFETY: to_f64() always returns Some for standard numeric types
300                let val = unsafe { x.0.to_f64().unwrap_unchecked() };
301                weighted_sum = ((i + 1) as f64).mul_add(val, weighted_sum);
302            }
303            weighted_sum
304        } else {
305            data.par_iter()
306                .enumerate()
307                .map(|(i, x)| {
308                    // SAFETY: to_f64() always returns Some for standard numeric types
309                    let val = unsafe { x.0.to_f64().unwrap_unchecked() };
310                    (i + 1) as f64 * val
311                })
312                .sum()
313        };
314        (precalc, weighted_sum)
315    } else if len < PARALLEL_THRESHOLD {
316        // Fused single pass: compute both sum and weighted_sum together
317        let mut sum = 0.0;
318        let mut weighted_sum = 0.0;
319        for (i, x) in data.iter().enumerate() {
320            // SAFETY: to_f64() always returns Some for standard numeric types (f32/f64, i/u 8-64)
321            let val = unsafe { x.0.to_f64().unwrap_unchecked() };
322            sum += val;
323            weighted_sum = ((i + 1) as f64).mul_add(val, weighted_sum);
324        }
325        (sum, weighted_sum)
326    } else {
327        // Fused parallel single pass using fold/reduce
328        data.par_iter()
329            .enumerate()
330            .fold(
331                || (0.0_f64, 0.0_f64),
332                |acc, (i, x)| {
333                    // SAFETY: to_f64() always returns Some for standard numeric types
334                    let val = unsafe { x.0.to_f64().unwrap_unchecked() };
335                    (acc.0 + val, ((i + 1) as f64).mul_add(val, acc.1))
336                },
337            )
338            .reduce(|| (0.0, 0.0), |a, b| (a.0 + b.0, a.1 + b.1))
339    };
340
341    // If sum is zero, Gini is undefined
342    if sum == 0.0 {
343        return None;
344    }
345
346    // Compute Gini coefficient using the formula:
347    // G = (2 * Σ(i * y_i)) / (n * Σ(y_i)) - (n + 1) / n
348    // where i is 1-indexed rank and y_i are sorted values
349    let n = len as f64;
350    let gini = 2.0f64.mul_add(weighted_sum / (n * sum), -(n + 1.0) / n);
351
352    Some(gini)
353}
354
355fn kurtosis_on_sorted<T>(
356    data: &[Partial<T>],
357    precalc_mean: Option<f64>,
358    precalc_variance: Option<f64>,
359) -> Option<f64>
360where
361    T: Sync + PartialOrd + ToPrimitive,
362{
363    let len = data.len();
364
365    // Need at least 4 elements for meaningful kurtosis
366    if len < 4 {
367        return None;
368    }
369
370    // Use pre-calculated mean if provided, otherwise compute it
371    let mean = if let Some(precalc) = precalc_mean {
372        precalc
373    } else {
374        let sum: f64 = if len < PARALLEL_THRESHOLD {
375            // Iterator sum enables auto-vectorization (SIMD) by the compiler
376            data.iter()
377                // SAFETY: to_f64() always returns Some for standard numeric types (f32/f64, i/u 8-64)
378                .map(|x| unsafe { x.0.to_f64().unwrap_unchecked() })
379                .sum()
380        } else {
381            data.par_iter()
382                // SAFETY: to_f64() always returns Some for standard numeric types
383                .map(|x| unsafe { x.0.to_f64().unwrap_unchecked() })
384                .sum()
385        };
386        sum / len as f64
387    };
388
389    // Compute variance_sq and fourth_power_sum
390    // If variance is provided, we can compute variance_sq directly (variance_sq = variance^2)
391    // Otherwise, we need to compute variance from the data
392    let (variance_sq, fourth_power_sum) = if let Some(variance) = precalc_variance {
393        // Negative variance is invalid (possible floating-point rounding artifact)
394        if variance < 0.0 {
395            return None;
396        }
397        // Use pre-calculated variance: variance_sq = variance^2
398        let variance_sq = variance * variance;
399
400        // Still need to compute fourth_power_sum
401        let fourth_power_sum = if len < PARALLEL_THRESHOLD {
402            let mut sum = 0.0;
403            for x in data {
404                // SAFETY: to_f64() always returns Some for standard numeric types
405                let val = unsafe { x.0.to_f64().unwrap_unchecked() };
406                let diff = val - mean;
407                let diff_sq = diff * diff;
408                sum = diff_sq.mul_add(diff_sq, sum);
409            }
410            sum
411        } else {
412            data.par_iter()
413                .map(|x| {
414                    // SAFETY: to_f64() always returns Some for standard numeric types
415                    let val = unsafe { x.0.to_f64().unwrap_unchecked() };
416                    let diff = val - mean;
417                    let diff_sq = diff * diff;
418                    diff_sq * diff_sq
419                })
420                .sum()
421        };
422
423        (variance_sq, fourth_power_sum)
424    } else {
425        // Compute both variance_sum and fourth_power_sum
426        let (variance_sum, fourth_power_sum) = if len < PARALLEL_THRESHOLD {
427            let mut variance_sum = 0.0;
428            let mut fourth_power_sum = 0.0;
429
430            for x in data {
431                // SAFETY: to_f64() always returns Some for standard numeric types
432                let val = unsafe { x.0.to_f64().unwrap_unchecked() };
433                let diff = val - mean;
434                let diff_sq = diff * diff;
435                variance_sum += diff_sq;
436                fourth_power_sum = diff_sq.mul_add(diff_sq, fourth_power_sum);
437            }
438
439            (variance_sum, fourth_power_sum)
440        } else {
441            // Single pass computing both sums simultaneously
442            data.par_iter()
443                .fold(
444                    || (0.0_f64, 0.0_f64),
445                    |acc, x| {
446                        // SAFETY: to_f64() always returns Some for standard numeric types
447                        let val = unsafe { x.0.to_f64().unwrap_unchecked() };
448                        let diff = val - mean;
449                        let diff_sq = diff * diff;
450                        (acc.0 + diff_sq, diff_sq.mul_add(diff_sq, acc.1))
451                    },
452                )
453                .reduce(|| (0.0, 0.0), |a, b| (a.0 + b.0, a.1 + b.1))
454        };
455
456        let variance = variance_sum / len as f64;
457
458        // If variance is zero, all values are the same, kurtosis is undefined
459        if variance == 0.0 {
460            return None;
461        }
462
463        let variance_sq = variance * variance;
464        (variance_sq, fourth_power_sum)
465    };
466
467    // If variance_sq is zero, all values are the same, kurtosis is undefined
468    if variance_sq == 0.0 {
469        return None;
470    }
471
472    let n = len as f64;
473
474    // Sample excess kurtosis formula:
475    // kurtosis = (n(n+1) * Σ((x_i - mean)⁴)) / ((n-1)(n-2)(n-3) * variance²) - 3(n-1)²/((n-2)(n-3))
476    let denominator = (n - 1.0) * (n - 2.0) * (n - 3.0);
477    let adjustment = 3.0 * (n - 1.0) * (n - 1.0) / denominator;
478    let kurtosis = (n * (n + 1.0) * fourth_power_sum).mul_add(1.0 / (denominator * variance_sq), -adjustment);
479
480    Some(kurtosis)
481}
482
483fn percentile_rank_on_sorted<T, V>(data: &[Partial<T>], value: &V) -> Option<f64>
484where
485    T: PartialOrd + ToPrimitive,
486    V: PartialOrd + ToPrimitive,
487{
488    let len = data.len();
489
490    if len == 0 {
491        return None;
492    }
493
494    let value_f64 = value.to_f64()?;
495
496    // Binary search to find the position where value would be inserted
497    // This gives us the number of values <= value
498    let count_leq = data.binary_search_by(|x| {
499        x.0.to_f64()
500            .unwrap_or(f64::NAN)
501            .partial_cmp(&value_f64)
502            .unwrap_or(std::cmp::Ordering::Less)
503    });
504
505    let count = match count_leq {
506        Ok(idx) => {
507            // Value found at idx — use partition_point (O(log n)) to find the upper bound
508            // of equal values instead of a linear scan
509            let upper = data[idx + 1..].partition_point(|x| {
510                x.0.to_f64()
511                    .is_some_and(|v| v.total_cmp(&value_f64).is_le())
512            });
513            idx + 1 + upper
514        }
515        Err(idx) => idx, // Number of values less than value
516    };
517
518    // Percentile rank = (count / n) * 100
519    Some((count as f64 / len as f64) * 100.0)
520}
521
522fn atkinson_on_sorted<T>(
523    data: &[Partial<T>],
524    epsilon: f64,
525    precalc_mean: Option<f64>,
526    precalc_geometric_sum: Option<f64>,
527) -> Option<f64>
528where
529    T: Sync + PartialOrd + ToPrimitive,
530{
531    let len = data.len();
532
533    // Early return for empty data
534    if len == 0 {
535        return None;
536    }
537
538    // Single element case: perfect equality, Atkinson = 0
539    if len == 1 {
540        return Some(0.0);
541    }
542
543    // Epsilon must be non-negative
544    if epsilon < 0.0 {
545        return None;
546    }
547
548    // Use pre-calculated mean if provided, otherwise compute it
549    let mean = if let Some(precalc) = precalc_mean {
550        precalc
551    } else {
552        let sum: f64 = if len < PARALLEL_THRESHOLD {
553            // Iterator sum enables auto-vectorization (SIMD) by the compiler
554            data.iter()
555                // SAFETY: to_f64() always returns Some for standard numeric types (f32/f64, i/u 8-64)
556                .map(|x| unsafe { x.0.to_f64().unwrap_unchecked() })
557                .sum()
558        } else {
559            data.par_iter()
560                // SAFETY: to_f64() always returns Some for standard numeric types
561                .map(|x| unsafe { x.0.to_f64().unwrap_unchecked() })
562                .sum()
563        };
564        sum / len as f64
565    };
566
567    // If mean is zero, Atkinson is undefined
568    if mean == 0.0 {
569        return None;
570    }
571
572    // Handle special case: epsilon = 1 (uses geometric mean)
573    if (epsilon - 1.0).abs() < 1e-10 {
574        // A_1 = 1 - (geometric_mean / mean)
575        let geometric_sum: f64 = if let Some(precalc) = precalc_geometric_sum {
576            precalc
577        } else if len < PARALLEL_THRESHOLD {
578            let mut sum = 0.0;
579            for x in data {
580                // SAFETY: to_f64() always returns Some for standard numeric types
581                let val = unsafe { x.0.to_f64().unwrap_unchecked() };
582                if val <= 0.0 {
583                    // Geometric mean undefined for non-positive values
584                    return None;
585                }
586                sum += val.ln();
587            }
588            sum
589        } else {
590            data.par_iter()
591                .map(|x| {
592                    // SAFETY: to_f64() always returns Some for standard numeric types
593                    let val = unsafe { x.0.to_f64().unwrap_unchecked() };
594                    if val <= 0.0 {
595                        return f64::NAN;
596                    }
597                    val.ln()
598                })
599                .sum()
600        };
601
602        if geometric_sum.is_nan() {
603            return None;
604        }
605
606        let geometric_mean = (geometric_sum / len as f64).exp();
607        return Some(1.0 - geometric_mean / mean);
608    }
609
610    // General case: epsilon != 1
611    // A_ε = 1 - (1/n * Σ((x_i/mean)^(1-ε)))^(1/(1-ε))
612    let exponent = 1.0 - epsilon;
613
614    let sum_powered: f64 = if len < PARALLEL_THRESHOLD {
615        let mut sum = 0.0;
616        for x in data {
617            // SAFETY: to_f64() always returns Some for standard numeric types
618            let val = unsafe { x.0.to_f64().unwrap_unchecked() };
619            if val < 0.0 {
620                // Negative values with non-integer exponent are undefined
621                return None;
622            }
623            let ratio = val / mean;
624            sum += ratio.powf(exponent);
625        }
626        sum
627    } else {
628        data.par_iter()
629            .map(|x| {
630                // SAFETY: to_f64() always returns Some for standard numeric types
631                let val = unsafe { x.0.to_f64().unwrap_unchecked() };
632                if val < 0.0 {
633                    return f64::NAN;
634                }
635                let ratio = val / mean;
636                ratio.powf(exponent)
637            })
638            .sum()
639    };
640
641    if sum_powered.is_nan() || sum_powered <= 0.0 {
642        return None;
643    }
644
645    let atkinson = 1.0 - (sum_powered / len as f64).powf(1.0 / exponent);
646    Some(atkinson)
647}
648
649/// Selection algorithm to find the k-th smallest element in O(n) average time.
650/// This is an implementation of quickselect algorithm.
651#[cfg(test)]
652fn quickselect<T>(data: &mut [Partial<T>], k: usize) -> Option<&T>
653where
654    T: PartialOrd,
655{
656    if data.is_empty() || k >= data.len() {
657        return None;
658    }
659
660    let mut left = 0;
661    let mut right = data.len() - 1;
662
663    loop {
664        if left == right {
665            return Some(&data[left].0);
666        }
667
668        // Use median-of-three pivot selection for better performance
669        let pivot_idx = median_of_three_pivot(data, left, right);
670        let pivot_idx = partition(data, left, right, pivot_idx);
671
672        match k.cmp(&pivot_idx) {
673            std::cmp::Ordering::Equal => return Some(&data[pivot_idx].0),
674            std::cmp::Ordering::Less => right = pivot_idx - 1,
675            std::cmp::Ordering::Greater => left = pivot_idx + 1,
676        }
677    }
678}
679
680/// Select the median of three elements as pivot for better quickselect performance
681#[cfg(test)]
682fn median_of_three_pivot<T>(data: &[Partial<T>], left: usize, right: usize) -> usize
683where
684    T: PartialOrd,
685{
686    let mid = left + (right - left) / 2;
687
688    if data[left] <= data[mid] {
689        if data[mid] <= data[right] {
690            mid
691        } else if data[left] <= data[right] {
692            right
693        } else {
694            left
695        }
696    } else if data[left] <= data[right] {
697        left
698    } else if data[mid] <= data[right] {
699        right
700    } else {
701        mid
702    }
703}
704
705/// Partition function for quickselect
706#[cfg(test)]
707fn partition<T>(data: &mut [Partial<T>], left: usize, right: usize, pivot_idx: usize) -> usize
708where
709    T: PartialOrd,
710{
711    // Move pivot to end
712    data.swap(pivot_idx, right);
713    let mut store_idx = left;
714
715    // Move all elements smaller than pivot to the left
716    // Cache pivot position for better cache locality (access data[right] directly each time)
717    for i in left..right {
718        // Safety: i, store_idx, and right are guaranteed to be in bounds
719        // Compare directly with pivot at data[right] - compiler should optimize this access
720        if unsafe { data.get_unchecked(i) <= data.get_unchecked(right) } {
721            data.swap(i, store_idx);
722            store_idx += 1;
723        }
724    }
725
726    // Move pivot to its final place
727    data.swap(store_idx, right);
728    store_idx
729}
730
731// This implementation follows Method 3 from https://en.wikipedia.org/wiki/Quartile
732// It divides data into quarters based on the length n = 4k + r where r is remainder.
733// For each remainder case (0,1,2,3), it uses different formulas to compute Q1, Q2, Q3.
734fn quartiles_on_sorted<T>(data: &[Partial<T>]) -> Option<(f64, f64, f64)>
735where
736    T: PartialOrd + ToPrimitive,
737{
738    let len = data.len();
739
740    // Early return for small arrays
741    match len {
742        0..=2 => return None,
743        3 => {
744            return Some(
745                // SAFETY: We know these indices are valid because len == 3
746                unsafe {
747                    (
748                        data.get_unchecked(0).0.to_f64()?,
749                        data.get_unchecked(1).0.to_f64()?,
750                        data.get_unchecked(2).0.to_f64()?,
751                    )
752                },
753            );
754        }
755        _ => {}
756    }
757
758    // Calculate k and remainder in one division
759    let k = len / 4;
760    let remainder = len % 4;
761
762    // SAFETY: All index calculations below are guaranteed to be in bounds
763    // because we've verified len >= 4 above and k is len/4
764    unsafe {
765        Some(match remainder {
766            0 => {
767                // Let data = {x_i}_{i=0..4k} where k is positive integer.
768                // Median q2 = (x_{2k-1} + x_{2k}) / 2.
769                // If we divide data into two parts {x_i < q2} as L and
770                // {x_i > q2} as R, #L == #R == 2k holds true. Thus,
771                // q1 = (x_{k-1} + x_{k}) / 2 and q3 = (x_{3k-1} + x_{3k}) / 2.
772                // =============
773                // Simply put: Length is multiple of 4 (4k)
774                // q1 = (x_{k-1} + x_k) / 2
775                // q2 = (x_{2k-1} + x_{2k}) / 2
776                // q3 = (x_{3k-1} + x_{3k}) / 2
777                let q1 = f64::midpoint(
778                    data.get_unchecked(k - 1).0.to_f64()?,
779                    data.get_unchecked(k).0.to_f64()?,
780                );
781                let q2 = f64::midpoint(
782                    data.get_unchecked(2 * k - 1).0.to_f64()?,
783                    data.get_unchecked(2 * k).0.to_f64()?,
784                );
785                let q3 = f64::midpoint(
786                    data.get_unchecked(3 * k - 1).0.to_f64()?,
787                    data.get_unchecked(3 * k).0.to_f64()?,
788                );
789                (q1, q2, q3)
790            }
791            1 => {
792                // Let data = {x_i}_{i=0..4k+1} where k is positive integer.
793                // Median q2 = x_{2k}.
794                // If we divide data other than q2 into two parts {x_i < q2}
795                // as L and {x_i > q2} as R, #L == #R == 2k holds true. Thus,
796                // q1 = (x_{k-1} + x_{k}) / 2 and q3 = (x_{3k} + x_{3k+1}) / 2.
797                // =============
798                // Simply put: Length is 4k + 1
799                // q1 = (x_{k-1} + x_k) / 2
800                // q2 = x_{2k}
801                // q3 = (x_{3k} + x_{3k+1}) / 2
802                let q1 = f64::midpoint(
803                    data.get_unchecked(k - 1).0.to_f64()?,
804                    data.get_unchecked(k).0.to_f64()?,
805                );
806                let q2 = data.get_unchecked(2 * k).0.to_f64()?;
807                let q3 = f64::midpoint(
808                    data.get_unchecked(3 * k).0.to_f64()?,
809                    data.get_unchecked(3 * k + 1).0.to_f64()?,
810                );
811                (q1, q2, q3)
812            }
813            2 => {
814                // Let data = {x_i}_{i=0..4k+2} where k is positive integer.
815                // Median q2 = (x_{(2k+1)-1} + x_{2k+1}) / 2.
816                // If we divide data into two parts {x_i < q2} as L and
817                // {x_i > q2} as R, it's true that #L == #R == 2k+1.
818                // Thus, q1 = x_{k} and q3 = x_{3k+1}.
819                // =============
820                // Simply put: Length is 4k + 2
821                // q1 = x_k
822                // q2 = (x_{2k} + x_{2k+1}) / 2
823                // q3 = x_{3k+1}
824                let q1 = data.get_unchecked(k).0.to_f64()?;
825                let q2 = f64::midpoint(
826                    data.get_unchecked(2 * k).0.to_f64()?,
827                    data.get_unchecked(2 * k + 1).0.to_f64()?,
828                );
829                let q3 = data.get_unchecked(3 * k + 1).0.to_f64()?;
830                (q1, q2, q3)
831            }
832            _ => {
833                // Let data = {x_i}_{i=0..4k+3} where k is positive integer.
834                // Median q2 = x_{2k+1}.
835                // If we divide data other than q2 into two parts {x_i < q2}
836                // as L and {x_i > q2} as R, #L == #R == 2k+1 holds true.
837                // Thus, q1 = x_{k} and q3 = x_{3k+2}.
838                // =============
839                // Simply put: Length is 4k + 3
840                // q1 = x_k
841                // q2 = x_{2k+1}
842                // q3 = x_{3k+2}
843                let q1 = data.get_unchecked(k).0.to_f64()?;
844                let q2 = data.get_unchecked(2 * k + 1).0.to_f64()?;
845                let q3 = data.get_unchecked(3 * k + 2).0.to_f64()?;
846                (q1, q2, q3)
847            }
848        })
849    }
850}
851
852/// Zero-copy quartiles computation using index-based selection.
853/// This avoids copying data by working with an array of indices.
854///
855/// Uses `select_nth_unstable_by` on the indices array, which partitions in-place.
856/// After selecting position p, elements at indices [0..p] are <= the p-th element
857/// and elements at [p+1..] are >= it. By selecting positions in ascending order,
858/// each subsequent selection only needs to search within the right partition,
859/// avoiding redundant O(n) resets.
860fn quartiles_with_zero_copy_selection<T>(data: &[Partial<T>]) -> Option<(f64, f64, f64)>
861where
862    T: PartialOrd + ToPrimitive,
863{
864    let len = data.len();
865
866    // Early return for small arrays
867    match len {
868        0..=2 => return None,
869        3 => {
870            let mut indices: Vec<usize> = (0..3).collect();
871            let cmp =
872                |a: &usize, b: &usize| data[*a].partial_cmp(&data[*b]).unwrap_or(std::cmp::Ordering::Less);
873            indices.sort_unstable_by(cmp);
874            let min_val = data[indices[0]].0.to_f64()?;
875            let med_val = data[indices[1]].0.to_f64()?;
876            let max_val = data[indices[2]].0.to_f64()?;
877            return Some((min_val, med_val, max_val));
878        }
879        _ => {}
880    }
881
882    let k = len / 4;
883    let remainder = len % 4;
884
885    let mut indices: Vec<usize> = (0..len).collect();
886    let cmp = |a: &usize, b: &usize| {
887        data[*a]
888            .partial_cmp(&data[*b])
889            .unwrap_or(std::cmp::Ordering::Less)
890    };
891
892    // Collect the unique positions we need in ascending order.
893    // By selecting in ascending order, each select_nth_unstable_by partitions
894    // the array so subsequent selections operate on progressively smaller slices.
895    // We deduplicate because adjacent quartile boundaries can overlap for small k.
896    let raw_positions: Vec<usize> = match remainder {
897        0 => vec![k - 1, k, 2 * k - 1, 2 * k, 3 * k - 1, 3 * k],
898        1 => vec![k - 1, k, 2 * k, 3 * k, 3 * k + 1],
899        2 => vec![k, 2 * k, 2 * k + 1, 3 * k + 1],
900        _ => vec![k, 2 * k + 1, 3 * k + 2],
901    };
902
903    let mut unique_positions = raw_positions.clone();
904    unique_positions.dedup();
905
906    // Select each unique position in ascending order, narrowing the search range
907    let mut start = 0;
908    for &pos in &unique_positions {
909        indices[start..].select_nth_unstable_by(pos - start, &cmp);
910        start = pos + 1;
911    }
912
913    // Now read all needed values (including duplicates) from the partitioned indices
914    let values: Vec<f64> = raw_positions
915        .iter()
916        .map(|&pos| data[indices[pos]].0.to_f64())
917        .collect::<Option<Vec<_>>>()?;
918
919    match remainder {
920        0 => {
921            let q1 = f64::midpoint(values[0], values[1]);
922            let q2 = f64::midpoint(values[2], values[3]);
923            let q3 = f64::midpoint(values[4], values[5]);
924            Some((q1, q2, q3))
925        }
926        1 => {
927            let q1 = f64::midpoint(values[0], values[1]);
928            let q2 = values[2];
929            let q3 = f64::midpoint(values[3], values[4]);
930            Some((q1, q2, q3))
931        }
932        2 => {
933            let q1 = values[0];
934            let q2 = f64::midpoint(values[1], values[2]);
935            let q3 = values[3];
936            Some((q1, q2, q3))
937        }
938        _ => Some((values[0], values[1], values[2])),
939    }
940}
941
942fn mode_on_sorted<T, I>(it: I) -> Option<T>
943where
944    T: PartialOrd,
945    I: Iterator<Item = T>,
946{
947    use std::cmp::Ordering;
948
949    // This approach to computing the mode works very nicely when the
950    // number of samples is large and is close to its cardinality.
951    // In other cases, a hashmap would be much better.
952    // But really, how can we know this when given an arbitrary stream?
953    // Might just switch to a hashmap to track frequencies. That would also
954    // be generally useful for discovering the cardinality of a sample.
955    let (mut mode, mut next) = (None, None);
956    let (mut mode_count, mut next_count) = (0usize, 0usize);
957    for x in it {
958        if mode.as_ref() == Some(&x) {
959            mode_count += 1;
960        } else if next.as_ref() == Some(&x) {
961            next_count += 1;
962        } else {
963            next = Some(x);
964            next_count = 0;
965        }
966
967        match next_count.cmp(&mode_count) {
968            Ordering::Greater => {
969                mode = next;
970                mode_count = next_count;
971                next = None;
972                next_count = 0;
973            }
974            Ordering::Equal => {
975                mode = None;
976                mode_count = 0;
977            }
978            Ordering::Less => {}
979        }
980    }
981    mode
982}
983
984/// Computes both modes and antimodes from a sorted slice of values.
985/// This version works with references to avoid unnecessary cloning.
986///
987/// # Arguments
988///
989/// * `data` - A sorted slice of values
990///
991/// # Notes
992///
993/// - Mode is the most frequently occurring value(s)
994/// - Antimode is the least frequently occurring value(s)
995/// - Only returns up to 10 antimodes to avoid returning the full set when all values are unique
996/// - For empty slices, returns empty vectors and zero counts
997/// - For single value slices, returns that value as the mode and empty antimode
998/// - When all values occur exactly once, returns empty mode and up to 10 values as antimodes
999///
1000/// # Returns
1001///
1002/// A tuple containing:
1003/// * Modes information: `(Vec<T>, usize, u32)` where:
1004///   - Vec<T>: Vector containing the mode values
1005///   - usize: Number of modes found
1006///   - u32: Frequency/count of the mode values
1007/// * Antimodes information: `(Vec<T>, usize, u32)` where:
1008///   - Vec<T>: Vector containing up to 10 antimode values
1009///   - usize: Total number of antimodes
1010///   - u32: Frequency/count of the antimode values
1011#[allow(clippy::type_complexity)]
1012#[inline]
1013fn modes_and_antimodes_on_sorted_slice<T>(
1014    data: &[Partial<T>],
1015) -> ((Vec<T>, usize, u32), (Vec<T>, usize, u32))
1016where
1017    T: PartialOrd + Clone,
1018{
1019    let size = data.len();
1020
1021    // Early return for empty slice
1022    if size == 0 {
1023        return ((Vec::new(), 0, 0), (Vec::new(), 0, 0));
1024    }
1025
1026    // Estimate capacity using integer square root of size
1027    // Integer square root using Newton's method (faster than floating point sqrt)
1028    let mut x = size;
1029    let mut y = x.div_ceil(2);
1030    while y < x {
1031        x = y;
1032        y = usize::midpoint(x, size / x);
1033    }
1034    let sqrt_size = x;
1035    let mut runs: Vec<(&T, u32)> = Vec::with_capacity(sqrt_size.clamp(16, 1_000));
1036
1037    let mut current_value = &data[0].0;
1038    let mut current_count = 1;
1039    let mut highest_count = 1;
1040    let mut lowest_count = u32::MAX;
1041
1042    // Count consecutive runs - optimized to reduce allocations
1043    for x in data.iter().skip(1) {
1044        if x.0 == *current_value {
1045            current_count += 1;
1046            highest_count = highest_count.max(current_count);
1047        } else {
1048            runs.push((current_value, current_count));
1049            lowest_count = lowest_count.min(current_count);
1050            current_value = &x.0;
1051            current_count = 1;
1052        }
1053    }
1054    runs.push((current_value, current_count));
1055    lowest_count = lowest_count.min(current_count);
1056
1057    // Early return if only one unique value
1058    if runs.len() == 1 {
1059        let (val, count) = runs.pop().unwrap();
1060        return ((vec![val.clone()], 1, count), (Vec::new(), 0, 0));
1061    }
1062
1063    // Special case: if all values appear exactly once
1064    if highest_count == 1 {
1065        let antimodes_count = runs.len().min(10);
1066        let total_count = runs.len();
1067        let mut antimodes = Vec::with_capacity(antimodes_count);
1068        for (val, _) in runs.into_iter().take(antimodes_count) {
1069            antimodes.push(val.clone());
1070        }
1071        // For modes: empty, count 0, occurrences 0 (not 1, 1)
1072        return ((Vec::new(), 0, 0), (antimodes, total_count, 1));
1073    }
1074
1075    // Collect modes and antimodes directly in a single pass, cloning values immediately
1076    // instead of collecting indices first and then cloning in a second pass
1077    let estimated_modes = (runs.len() / 10).clamp(1, 10);
1078    let estimated_antimodes = 10.min(runs.len());
1079
1080    let mut modes_result = Vec::with_capacity(estimated_modes);
1081    let mut antimodes_result = Vec::with_capacity(estimated_antimodes);
1082    let mut mode_count = 0;
1083    let mut antimodes_count = 0;
1084    let mut antimodes_collected = 0_u32;
1085
1086    for (val, count) in &runs {
1087        if *count == highest_count {
1088            modes_result.push((*val).clone());
1089            mode_count += 1;
1090        }
1091        if *count == lowest_count {
1092            antimodes_count += 1;
1093            if antimodes_collected < 10 {
1094                antimodes_result.push((*val).clone());
1095                antimodes_collected += 1;
1096            }
1097        }
1098    }
1099
1100    (
1101        (modes_result, mode_count, highest_count),
1102        (antimodes_result, antimodes_count, lowest_count),
1103    )
1104}
1105
1106/// A commutative data structure for lazily sorted sequences of data.
1107///
1108/// The sort does not occur until statistics need to be computed.
1109///
1110/// Note that this works on types that do not define a total ordering like
1111/// `f32` and `f64`. When an ordering is not defined, an arbitrary order
1112/// is returned.
1113#[allow(clippy::unsafe_derive_deserialize)]
1114#[derive(Clone, Serialize, Deserialize)]
1115pub struct Unsorted<T> {
1116    /// Internal cache flag indicating whether `data` is currently sorted.
1117    /// This field is skipped during serialization and deserialization.
1118    #[serde(skip)]
1119    sorted: bool,
1120    data: Vec<Partial<T>>,
1121}
1122
1123// Manual PartialEq/Eq: ignore `sorted` cache flag so equality reflects
1124// logical contents only (two Unsorted with same data compare equal
1125// regardless of whether one has been sorted).
1126impl<T: PartialEq> PartialEq for Unsorted<T> {
1127    fn eq(&self, other: &Self) -> bool {
1128        self.data == other.data
1129    }
1130}
1131
1132impl<T: PartialEq> Eq for Unsorted<T> where Partial<T>: Eq {}
1133
1134impl<T: PartialOrd + Send> Unsorted<T> {
1135    /// Create initial empty state.
1136    #[inline]
1137    #[must_use]
1138    pub fn new() -> Unsorted<T> {
1139        Default::default()
1140    }
1141
1142    /// Add a new element to the set.
1143    #[allow(clippy::inline_always)]
1144    #[inline(always)]
1145    pub fn add(&mut self, v: T) {
1146        self.sorted = false;
1147        self.data.push(Partial(v));
1148    }
1149
1150    /// Return the number of data points.
1151    #[inline]
1152    #[must_use]
1153    pub const fn len(&self) -> usize {
1154        self.data.len()
1155    }
1156
1157    #[inline]
1158    #[must_use]
1159    pub const fn is_empty(&self) -> bool {
1160        self.data.is_empty()
1161    }
1162
1163    #[inline]
1164    fn sort(&mut self) {
1165        if !self.sorted {
1166            // Use sequential sort for small datasets (< 10k elements) to avoid parallel overhead
1167            if self.data.len() < PARALLEL_THRESHOLD {
1168                self.data.sort_unstable();
1169            } else {
1170                self.data.par_sort_unstable();
1171            }
1172            self.sorted = true;
1173        }
1174    }
1175
1176    #[inline]
1177    const fn already_sorted(&mut self) {
1178        self.sorted = true;
1179    }
1180
1181    /// Add multiple elements efficiently
1182    #[inline]
1183    pub fn add_bulk(&mut self, values: Vec<T>) {
1184        self.sorted = false;
1185        self.data.reserve(values.len());
1186        self.data.extend(values.into_iter().map(Partial));
1187    }
1188
1189    /// Shrink capacity to fit current data
1190    #[inline]
1191    pub fn shrink_to_fit(&mut self) {
1192        self.data.shrink_to_fit();
1193    }
1194
1195    /// Create with specific capacity
1196    #[inline]
1197    #[must_use]
1198    pub fn with_capacity(capacity: usize) -> Self {
1199        Unsorted {
1200            sorted: true,
1201            data: Vec::with_capacity(capacity),
1202        }
1203    }
1204
1205    /// Add a value assuming it's greater than all existing values
1206    #[inline]
1207    pub fn push_ascending(&mut self, value: T) {
1208        if let Some(last) = self.data.last() {
1209            debug_assert!(last.0 <= value, "Value must be >= than last element");
1210        }
1211        self.data.push(Partial(value));
1212        // Data remains sorted
1213    }
1214}
1215
1216impl<T: PartialOrd + PartialEq + Clone + Send + Sync> Unsorted<T> {
1217    #[inline]
1218    /// Returns the cardinality of the data.
1219    /// Set `sorted` to `true` if the data is already sorted.
1220    /// Set `parallel_threshold` to `0` to force sequential processing.
1221    /// Set `parallel_threshold` to `1` to use the default parallel threshold (`10_000`).
1222    /// Set `parallel_threshold` to `2` to force parallel processing.
1223    /// Set `parallel_threshold` to any other value to use a custom parallel threshold
1224    /// greater than the default threshold of `10_000`.
1225    pub fn cardinality(&mut self, sorted: bool, parallel_threshold: usize) -> u64 {
1226        const CHUNK_SIZE: usize = 2048; // Process data in chunks of 2048 elements
1227        const DEFAULT_PARALLEL_THRESHOLD: usize = 10_240; // multiple of 2048
1228
1229        let len = self.data.len();
1230        match len {
1231            0 => return 0,
1232            1 => return 1,
1233            _ => {}
1234        }
1235
1236        if sorted {
1237            self.already_sorted();
1238        } else {
1239            self.sort();
1240        }
1241
1242        let use_parallel = parallel_threshold != 0
1243            && (parallel_threshold == 1
1244                || len > parallel_threshold.max(DEFAULT_PARALLEL_THRESHOLD));
1245
1246        if use_parallel {
1247            // Parallel processing using chunks
1248            // Process chunks in parallel, returning (count, first_elem, last_elem) for each
1249            type ChunkInfo<'a, T> = Vec<(u64, Option<&'a Partial<T>>, Option<&'a Partial<T>>)>;
1250            let chunk_info: ChunkInfo<'_, T> = self
1251                .data
1252                .par_chunks(CHUNK_SIZE)
1253                .map(|chunk| {
1254                    // Count unique elements within this chunk
1255                    let mut count = u64::from(!chunk.is_empty());
1256                    for [a, b] in chunk.array_windows::<2>() {
1257                        if a != b {
1258                            count += 1;
1259                        }
1260                    }
1261                    (count, chunk.first(), chunk.last())
1262                })
1263                .collect();
1264
1265            // Combine results, checking boundaries between chunks
1266            let mut total = 0;
1267            for (i, &(count, first_opt, _last_opt)) in chunk_info.iter().enumerate() {
1268                total += count;
1269
1270                // Check boundary with previous chunk
1271                if i > 0
1272                    && let (Some(prev_last), Some(curr_first)) = (chunk_info[i - 1].2, first_opt)
1273                    && prev_last == curr_first
1274                {
1275                    total -= 1; // Deduct 1 if boundary values are equal
1276                }
1277            }
1278
1279            total
1280        } else {
1281            // Sequential processing
1282
1283            // the statement below is equivalent to:
1284            // let mut count = if self.data.is_empty() { 0 } else { 1 };
1285            let mut count = u64::from(!self.data.is_empty());
1286
1287            for [a, b] in self.data.array_windows::<2>() {
1288                if a != b {
1289                    count += 1;
1290                }
1291            }
1292            count
1293        }
1294    }
1295}
1296
1297impl<T: PartialOrd + Clone + Send> Unsorted<T> {
1298    /// Returns the mode of the data.
1299    #[inline]
1300    pub fn mode(&mut self) -> Option<T> {
1301        if self.data.is_empty() {
1302            return None;
1303        }
1304        self.sort();
1305        mode_on_sorted(self.data.iter().map(|p| &p.0)).cloned()
1306    }
1307
1308    /// Returns the modes of the data.
1309    /// Note that there is also a `frequency::mode()` function that return one mode
1310    /// with the highest frequency. If there is a tie, it returns None.
1311    #[inline]
1312    fn modes(&mut self) -> (Vec<T>, usize, u32) {
1313        if self.data.is_empty() {
1314            return (Vec::new(), 0, 0);
1315        }
1316        self.sort();
1317        modes_and_antimodes_on_sorted_slice(&self.data).0
1318    }
1319
1320    /// Returns the antimodes of the data.
1321    /// `antimodes_result` only returns the first 10 antimodes
1322    #[inline]
1323    fn antimodes(&mut self) -> (Vec<T>, usize, u32) {
1324        if self.data.is_empty() {
1325            return (Vec::new(), 0, 0);
1326        }
1327        self.sort();
1328        modes_and_antimodes_on_sorted_slice(&self.data).1
1329    }
1330
1331    /// Returns the modes and antimodes of the data.
1332    /// `antimodes_result` only returns the first 10 antimodes
1333    #[allow(clippy::type_complexity)]
1334    #[inline]
1335    pub fn modes_antimodes(&mut self) -> ((Vec<T>, usize, u32), (Vec<T>, usize, u32)) {
1336        if self.data.is_empty() {
1337            return ((Vec::new(), 0, 0), (Vec::new(), 0, 0));
1338        }
1339        self.sort();
1340        modes_and_antimodes_on_sorted_slice(&self.data)
1341    }
1342}
1343
1344impl Unsorted<Vec<u8>> {
1345    /// Add a byte slice, converting to `Vec<u8>` internally.
1346    ///
1347    /// This is a convenience method that avoids requiring the caller to call
1348    /// `.to_vec()` before `add()`. The allocation still occurs internally,
1349    /// but the API is cleaner and opens the door for future optimizations
1350    /// (e.g., frequency-map deduplication for high-cardinality data).
1351    #[allow(clippy::inline_always)]
1352    #[inline(always)]
1353    pub fn add_bytes(&mut self, v: &[u8]) {
1354        self.sorted = false;
1355        self.data.push(Partial(v.to_vec()));
1356    }
1357}
1358
1359impl<T: PartialOrd + ToPrimitive + Send> Unsorted<T> {
1360    /// Returns the median of the data.
1361    #[inline]
1362    pub fn median(&mut self) -> Option<f64> {
1363        if self.data.is_empty() {
1364            return None;
1365        }
1366        self.sort();
1367        median_on_sorted(&self.data)
1368    }
1369}
1370
1371impl<T: PartialOrd + ToPrimitive + Send + Sync> Unsorted<T> {
1372    /// Returns the Median Absolute Deviation (MAD) of the data.
1373    #[inline]
1374    pub fn mad(&mut self, existing_median: Option<f64>) -> Option<f64> {
1375        if self.data.is_empty() {
1376            return None;
1377        }
1378        if existing_median.is_none() {
1379            self.sort();
1380        }
1381        mad_on_sorted(&self.data, existing_median)
1382    }
1383}
1384
1385impl<T: PartialOrd + ToPrimitive + Send> Unsorted<T> {
1386    /// Returns the quartiles of the data using the traditional sorting approach.
1387    ///
1388    /// This method sorts the data first and then computes quartiles.
1389    /// Time complexity: O(n log n)
1390    #[inline]
1391    pub fn quartiles(&mut self) -> Option<(f64, f64, f64)> {
1392        if self.data.is_empty() {
1393            return None;
1394        }
1395        self.sort();
1396        quartiles_on_sorted(&self.data)
1397    }
1398}
1399
1400impl<T: PartialOrd + ToPrimitive + Send + Sync> Unsorted<T> {
1401    /// Returns the Gini Coefficient of the data.
1402    ///
1403    /// The Gini Coefficient measures inequality in a distribution, ranging from 0 (perfect equality)
1404    /// to 1 (perfect inequality). This method sorts the data first and then computes the Gini coefficient.
1405    /// Time complexity: O(n log n)
1406    #[inline]
1407    pub fn gini(&mut self, precalc_sum: Option<f64>) -> Option<f64> {
1408        if self.data.is_empty() {
1409            return None;
1410        }
1411        self.sort();
1412        gini_on_sorted(&self.data, precalc_sum)
1413    }
1414
1415    /// Returns the kurtosis (excess kurtosis) of the data.
1416    ///
1417    /// Kurtosis measures the "tailedness" of a distribution. Excess kurtosis is kurtosis - 3,
1418    /// where 0 indicates a normal distribution, positive values indicate heavy tails, and
1419    /// negative values indicate light tails. This method sorts the data first and then computes kurtosis.
1420    /// Time complexity: O(n log n)
1421    #[inline]
1422    pub fn kurtosis(
1423        &mut self,
1424        precalc_mean: Option<f64>,
1425        precalc_variance: Option<f64>,
1426    ) -> Option<f64> {
1427        if self.data.is_empty() {
1428            return None;
1429        }
1430        self.sort();
1431        kurtosis_on_sorted(&self.data, precalc_mean, precalc_variance)
1432    }
1433
1434    /// Returns the percentile rank of a value in the data.
1435    ///
1436    /// Returns the percentile rank (0-100) of the given value. If the value is less than all
1437    /// values, returns 0.0. If greater than all, returns 100.0.
1438    /// This method sorts the data first and then computes the percentile rank.
1439    /// Time complexity: O(n log n)
1440    #[inline]
1441    #[allow(clippy::needless_pass_by_value)]
1442    pub fn percentile_rank<V>(&mut self, value: V) -> Option<f64>
1443    where
1444        V: PartialOrd + ToPrimitive,
1445    {
1446        if self.data.is_empty() {
1447            return None;
1448        }
1449        self.sort();
1450        percentile_rank_on_sorted(&self.data, &value)
1451    }
1452
1453    /// Returns the Atkinson Index of the data.
1454    ///
1455    /// The Atkinson Index measures inequality with an inequality aversion parameter ε.
1456    /// It ranges from 0 (perfect equality) to 1 (perfect inequality).
1457    /// Higher ε values give more weight to inequality at the lower end of the distribution.
1458    /// This method sorts the data first and then computes the Atkinson index.
1459    /// Time complexity: O(n log n)
1460    ///
1461    /// # Arguments
1462    /// * `epsilon` - Inequality aversion parameter (must be >= 0). Common values:
1463    ///   - 0.0: No inequality aversion (returns 0)
1464    ///   - 0.5: Moderate aversion
1465    ///   - 1.0: Uses geometric mean (special case)
1466    ///   - 2.0: High aversion
1467    /// * `precalc_mean` - Optional pre-calculated mean
1468    /// * `precalc_geometric_sum` - Optional pre-calculated geometric sum (sum of ln(val)), only used when epsilon = 1
1469    #[inline]
1470    pub fn atkinson(
1471        &mut self,
1472        epsilon: f64,
1473        precalc_mean: Option<f64>,
1474        precalc_geometric_sum: Option<f64>,
1475    ) -> Option<f64> {
1476        if self.data.is_empty() {
1477            return None;
1478        }
1479        self.sort();
1480        atkinson_on_sorted(&self.data, epsilon, precalc_mean, precalc_geometric_sum)
1481    }
1482}
1483
1484impl<T: PartialOrd + ToPrimitive + Clone + Send> Unsorted<T> {
1485    /// Returns the quartiles of the data using selection algorithm.
1486    ///
1487    /// This implementation uses a selection algorithm (quickselect) to find quartiles
1488    /// in O(n) average time complexity instead of O(n log n) sorting.
1489    /// Requires T to implement Clone to create a working copy of the data.
1490    ///
1491    /// **Performance Note**: While theoretically O(n) vs O(n log n), this implementation
1492    /// is often slower than the sorting-based approach for small to medium datasets due to:
1493    /// - Need to find multiple order statistics (3 separate quickselect calls)
1494    /// - Overhead of copying data to avoid mutation
1495    /// - Rayon's highly optimized parallel sorting
1496    #[inline]
1497    pub fn quartiles_with_selection(&mut self) -> Option<(f64, f64, f64)> {
1498        if self.data.is_empty() {
1499            return None;
1500        }
1501        // Use zero-copy approach (indices-based) to avoid cloning all elements
1502        quartiles_with_zero_copy_selection(&self.data)
1503    }
1504}
1505
1506impl<T: PartialOrd + ToPrimitive + Send> Unsorted<T> {
1507    /// Returns the quartiles using zero-copy selection algorithm.
1508    ///
1509    /// This implementation avoids copying data by working with indices instead,
1510    /// providing better performance than the clone-based selection approach.
1511    /// The algorithm is O(n) average time and only allocates a vector of indices (usize).
1512    #[inline]
1513    #[must_use]
1514    pub fn quartiles_zero_copy(&self) -> Option<(f64, f64, f64)> {
1515        if self.data.is_empty() {
1516            return None;
1517        }
1518        quartiles_with_zero_copy_selection(&self.data)
1519    }
1520}
1521
1522impl<T: PartialOrd + Send> Commute for Unsorted<T> {
1523    #[inline]
1524    fn merge(&mut self, mut v: Unsorted<T>) {
1525        if v.is_empty() {
1526            return;
1527        }
1528
1529        self.sorted = false;
1530        // we use std::mem::take to avoid unnecessary allocations
1531        self.data.extend(std::mem::take(&mut v.data));
1532    }
1533}
1534
1535impl<T: PartialOrd> Default for Unsorted<T> {
1536    #[inline]
1537    fn default() -> Unsorted<T> {
1538        Unsorted {
1539            data: Vec::with_capacity(16),
1540            sorted: true, // empty is sorted
1541        }
1542    }
1543}
1544
1545impl<T: PartialOrd + Send> FromIterator<T> for Unsorted<T> {
1546    #[inline]
1547    fn from_iter<I: IntoIterator<Item = T>>(it: I) -> Unsorted<T> {
1548        let mut v = Unsorted::new();
1549        v.extend(it);
1550        v
1551    }
1552}
1553
1554impl<T: PartialOrd> Extend<T> for Unsorted<T> {
1555    #[inline]
1556    fn extend<I: IntoIterator<Item = T>>(&mut self, it: I) {
1557        self.sorted = false;
1558        self.data.extend(it.into_iter().map(Partial));
1559    }
1560}
1561
1562fn custom_percentiles_on_sorted<T>(data: &[Partial<T>], percentiles: &[u8]) -> Option<Vec<T>>
1563where
1564    T: PartialOrd + Clone,
1565{
1566    let len = data.len();
1567
1568    // Early return for empty array or invalid percentiles
1569    if len == 0 || percentiles.iter().any(|&p| p > 100) {
1570        return None;
1571    }
1572
1573    // Optimize: Check if percentiles are already sorted and unique
1574    let unique_percentiles: Vec<u8> = if percentiles.len() <= 1 {
1575        // Single or empty percentile - no need to sort/dedup
1576        percentiles.to_vec()
1577    } else {
1578        // Check if already sorted and unique (common case)
1579        let is_sorted_unique = percentiles.array_windows::<2>().all(|[a, b]| a < b);
1580
1581        if is_sorted_unique {
1582            // Already sorted and unique, use directly without cloning
1583            percentiles.to_vec()
1584        } else {
1585            // Need to sort and dedup - use fixed-size bool array (domain is 0..=100)
1586            let mut seen = [false; 101];
1587            let mut sorted_unique = Vec::with_capacity(percentiles.len().min(101));
1588            for &p in percentiles {
1589                if !seen[p as usize] {
1590                    seen[p as usize] = true;
1591                    sorted_unique.push(p);
1592                }
1593            }
1594            sorted_unique.sort_unstable();
1595            sorted_unique
1596        }
1597    };
1598
1599    let mut results = Vec::with_capacity(unique_percentiles.len());
1600
1601    // SAFETY: All index calculations below are guaranteed to be in bounds
1602    // because we've verified len > 0 and the rank calculation ensures
1603    // the index is within bounds
1604    unsafe {
1605        for &p in &unique_percentiles {
1606            // Calculate the ordinal rank using nearest-rank method
1607            // see https://en.wikipedia.org/wiki/Percentile#The_nearest-rank_method
1608            // n = ⌈(P/100) × N⌉
1609            #[allow(clippy::cast_sign_loss)]
1610            let rank = ((f64::from(p) / 100.0) * len as f64).ceil() as usize;
1611
1612            // Convert to 0-based index
1613            let idx = rank.saturating_sub(1);
1614
1615            // Get the value at that rank and extract the inner value
1616            results.push(data.get_unchecked(idx).0.clone());
1617        }
1618    }
1619
1620    Some(results)
1621}
1622
1623impl<T: PartialOrd + Clone + Send> Unsorted<T> {
1624    /// Returns the requested percentiles of the data.
1625    ///
1626    /// Uses the nearest-rank method to compute percentiles.
1627    /// Each returned value is an actual value from the dataset.
1628    ///
1629    /// # Arguments
1630    /// * `percentiles` - A slice of u8 values representing percentiles to compute (0-100)
1631    ///
1632    /// # Returns
1633    /// * `None` if the data is empty or if any percentile is > 100
1634    /// * `Some(Vec<T>)` containing percentile values in the same order as requested
1635    ///
1636    /// # Example
1637    /// ```
1638    /// use stats::Unsorted;
1639    /// let mut data = Unsorted::new();
1640    /// data.extend(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
1641    /// let percentiles = vec![25, 50, 75];
1642    /// let results = data.custom_percentiles(&percentiles).unwrap();
1643    /// assert_eq!(results, vec![3, 5, 8]);
1644    /// ```
1645    #[inline]
1646    pub fn custom_percentiles(&mut self, percentiles: &[u8]) -> Option<Vec<T>> {
1647        if self.data.is_empty() {
1648            return None;
1649        }
1650        self.sort();
1651        custom_percentiles_on_sorted(&self.data, percentiles)
1652    }
1653}
1654
1655#[cfg(test)]
1656mod test {
1657    use super::*;
1658
1659    #[test]
1660    fn test_cardinality_empty() {
1661        let mut unsorted: Unsorted<i32> = Unsorted::new();
1662        assert_eq!(unsorted.cardinality(false, 1), 0);
1663    }
1664
1665    #[test]
1666    fn test_cardinality_single_element() {
1667        let mut unsorted = Unsorted::new();
1668        unsorted.add(5);
1669        assert_eq!(unsorted.cardinality(false, 1), 1);
1670    }
1671
1672    #[test]
1673    fn test_cardinality_unique_elements() {
1674        let mut unsorted = Unsorted::new();
1675        unsorted.extend(vec![1, 2, 3, 4, 5]);
1676        assert_eq!(unsorted.cardinality(false, 1), 5);
1677    }
1678
1679    #[test]
1680    fn test_cardinality_duplicate_elements() {
1681        let mut unsorted = Unsorted::new();
1682        unsorted.extend(vec![1, 2, 2, 3, 3, 3, 4, 4, 4, 4]);
1683        assert_eq!(unsorted.cardinality(false, 1), 4);
1684    }
1685
1686    #[test]
1687    fn test_cardinality_all_same() {
1688        let mut unsorted = Unsorted::new();
1689        unsorted.extend(vec![1; 100]);
1690        assert_eq!(unsorted.cardinality(false, 1), 1);
1691    }
1692
1693    #[test]
1694    fn test_cardinality_large_range() {
1695        let mut unsorted = Unsorted::new();
1696        unsorted.extend(0..1_000_000);
1697        assert_eq!(unsorted.cardinality(false, 1), 1_000_000);
1698    }
1699
1700    #[test]
1701    fn test_cardinality_large_range_sequential() {
1702        let mut unsorted = Unsorted::new();
1703        unsorted.extend(0..1_000_000);
1704        assert_eq!(unsorted.cardinality(false, 2_000_000), 1_000_000);
1705    }
1706
1707    #[test]
1708    fn test_cardinality_presorted() {
1709        let mut unsorted = Unsorted::new();
1710        unsorted.extend(vec![1, 2, 3, 4, 5]);
1711        unsorted.sort();
1712        assert_eq!(unsorted.cardinality(true, 1), 5);
1713    }
1714
1715    #[test]
1716    fn test_cardinality_float() {
1717        let mut unsorted = Unsorted::new();
1718        unsorted.extend(vec![1.0, 1.0, 2.0, 3.0, 3.0, 4.0]);
1719        assert_eq!(unsorted.cardinality(false, 1), 4);
1720    }
1721
1722    #[test]
1723    fn test_cardinality_string() {
1724        let mut unsorted = Unsorted::new();
1725        unsorted.extend(vec!["a", "b", "b", "c", "c", "c"]);
1726        assert_eq!(unsorted.cardinality(false, 1), 3);
1727    }
1728
1729    #[test]
1730    fn test_quartiles_selection_vs_sorted() {
1731        // Test that selection-based quartiles gives same results as sorting-based
1732        let test_cases = vec![
1733            vec![3, 5, 7, 9],
1734            vec![3, 5, 7],
1735            vec![1, 2, 7, 11],
1736            vec![3, 5, 7, 9, 12],
1737            vec![2, 2, 3, 8, 10],
1738            vec![3, 5, 7, 9, 12, 20],
1739            vec![0, 2, 4, 8, 10, 11],
1740            vec![3, 5, 7, 9, 12, 20, 21],
1741            vec![1, 5, 6, 6, 7, 10, 19],
1742        ];
1743
1744        for test_case in test_cases {
1745            let mut unsorted1 = Unsorted::new();
1746            let mut unsorted2 = Unsorted::new();
1747            let mut unsorted3 = Unsorted::new();
1748            unsorted1.extend(test_case.clone());
1749            unsorted2.extend(test_case.clone());
1750            unsorted3.extend(test_case.clone());
1751
1752            let result_sorted = unsorted1.quartiles();
1753            let result_selection = unsorted2.quartiles_with_selection();
1754            let result_zero_copy = unsorted3.quartiles_zero_copy();
1755
1756            assert_eq!(
1757                result_sorted, result_selection,
1758                "Selection mismatch for test case: {:?}",
1759                test_case
1760            );
1761            assert_eq!(
1762                result_sorted, result_zero_copy,
1763                "Zero-copy mismatch for test case: {:?}",
1764                test_case
1765            );
1766        }
1767    }
1768
1769    #[test]
1770    fn test_quartiles_with_selection_small() {
1771        // Test edge cases for selection-based quartiles
1772        let mut unsorted: Unsorted<i32> = Unsorted::new();
1773        assert_eq!(unsorted.quartiles_with_selection(), None);
1774
1775        let mut unsorted = Unsorted::new();
1776        unsorted.extend(vec![1, 2]);
1777        assert_eq!(unsorted.quartiles_with_selection(), None);
1778
1779        let mut unsorted = Unsorted::new();
1780        unsorted.extend(vec![1, 2, 3]);
1781        assert_eq!(unsorted.quartiles_with_selection(), Some((1.0, 2.0, 3.0)));
1782    }
1783
1784    #[test]
1785    fn test_quickselect() {
1786        let data = vec![
1787            Partial(3),
1788            Partial(1),
1789            Partial(4),
1790            Partial(1),
1791            Partial(5),
1792            Partial(9),
1793            Partial(2),
1794            Partial(6),
1795        ];
1796
1797        // Test finding different positions
1798        assert_eq!(quickselect(&mut data.clone(), 0), Some(&1));
1799        assert_eq!(quickselect(&mut data.clone(), 3), Some(&3));
1800        assert_eq!(quickselect(&mut data.clone(), 7), Some(&9));
1801
1802        // Test edge cases
1803        let mut empty: Vec<Partial<i32>> = vec![];
1804        assert_eq!(quickselect(&mut empty, 0), None);
1805
1806        let mut data = vec![Partial(3), Partial(1), Partial(4), Partial(1), Partial(5)];
1807        assert_eq!(quickselect(&mut data, 10), None); // k >= len
1808    }
1809
1810    #[test]
1811    fn median_stream() {
1812        assert_eq!(median(vec![3usize, 5, 7, 9].into_iter()), Some(6.0));
1813        assert_eq!(median(vec![3usize, 5, 7].into_iter()), Some(5.0));
1814    }
1815
1816    #[test]
1817    fn mad_stream() {
1818        assert_eq!(mad(vec![3usize, 5, 7, 9].into_iter(), None), Some(2.0));
1819        assert_eq!(
1820            mad(
1821                vec![
1822                    86usize, 60, 95, 39, 49, 12, 56, 82, 92, 24, 33, 28, 46, 34, 100, 39, 100, 38,
1823                    50, 61, 39, 88, 5, 13, 64
1824                ]
1825                .into_iter(),
1826                None
1827            ),
1828            Some(16.0)
1829        );
1830    }
1831
1832    #[test]
1833    fn mad_stream_precalc_median() {
1834        let data = vec![3usize, 5, 7, 9].into_iter();
1835        let median1 = median(data.clone());
1836        assert_eq!(mad(data, median1), Some(2.0));
1837
1838        let data2 = vec![
1839            86usize, 60, 95, 39, 49, 12, 56, 82, 92, 24, 33, 28, 46, 34, 100, 39, 100, 38, 50, 61,
1840            39, 88, 5, 13, 64,
1841        ]
1842        .into_iter();
1843        let median2 = median(data2.clone());
1844        assert_eq!(mad(data2, median2), Some(16.0));
1845    }
1846
1847    #[test]
1848    fn mode_stream() {
1849        assert_eq!(mode(vec![3usize, 5, 7, 9].into_iter()), None);
1850        assert_eq!(mode(vec![3usize, 3, 3, 3].into_iter()), Some(3));
1851        assert_eq!(mode(vec![3usize, 3, 3, 4].into_iter()), Some(3));
1852        assert_eq!(mode(vec![4usize, 3, 3, 3].into_iter()), Some(3));
1853        assert_eq!(mode(vec![1usize, 1, 2, 3, 3].into_iter()), None);
1854    }
1855
1856    #[test]
1857    fn median_floats() {
1858        assert_eq!(median(vec![3.0f64, 5.0, 7.0, 9.0].into_iter()), Some(6.0));
1859        assert_eq!(median(vec![3.0f64, 5.0, 7.0].into_iter()), Some(5.0));
1860    }
1861
1862    #[test]
1863    fn mode_floats() {
1864        assert_eq!(mode(vec![3.0f64, 5.0, 7.0, 9.0].into_iter()), None);
1865        assert_eq!(mode(vec![3.0f64, 3.0, 3.0, 3.0].into_iter()), Some(3.0));
1866        assert_eq!(mode(vec![3.0f64, 3.0, 3.0, 4.0].into_iter()), Some(3.0));
1867        assert_eq!(mode(vec![4.0f64, 3.0, 3.0, 3.0].into_iter()), Some(3.0));
1868        assert_eq!(mode(vec![1.0f64, 1.0, 2.0, 3.0, 3.0].into_iter()), None);
1869    }
1870
1871    #[test]
1872    fn modes_stream() {
1873        assert_eq!(modes(vec![3usize, 5, 7, 9].into_iter()), (vec![], 0, 0));
1874        assert_eq!(modes(vec![3usize, 3, 3, 3].into_iter()), (vec![3], 1, 4));
1875        assert_eq!(modes(vec![3usize, 3, 4, 4].into_iter()), (vec![3, 4], 2, 2));
1876        assert_eq!(modes(vec![4usize, 3, 3, 3].into_iter()), (vec![3], 1, 3));
1877        assert_eq!(modes(vec![1usize, 1, 2, 2].into_iter()), (vec![1, 2], 2, 2));
1878        let vec: Vec<u32> = vec![];
1879        assert_eq!(modes(vec.into_iter()), (vec![], 0, 0));
1880    }
1881
1882    #[test]
1883    fn modes_floats() {
1884        assert_eq!(
1885            modes(vec![3_f64, 5.0, 7.0, 9.0].into_iter()),
1886            (vec![], 0, 0)
1887        );
1888        assert_eq!(
1889            modes(vec![3_f64, 3.0, 3.0, 3.0].into_iter()),
1890            (vec![3.0], 1, 4)
1891        );
1892        assert_eq!(
1893            modes(vec![3_f64, 3.0, 4.0, 4.0].into_iter()),
1894            (vec![3.0, 4.0], 2, 2)
1895        );
1896        assert_eq!(
1897            modes(vec![1_f64, 1.0, 2.0, 3.0, 3.0].into_iter()),
1898            (vec![1.0, 3.0], 2, 2)
1899        );
1900    }
1901
1902    #[test]
1903    fn antimodes_stream() {
1904        assert_eq!(
1905            antimodes(vec![3usize, 5, 7, 9].into_iter()),
1906            (vec![3, 5, 7, 9], 4, 1)
1907        );
1908        assert_eq!(
1909            antimodes(vec![1usize, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13].into_iter()),
1910            (vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 13, 1)
1911        );
1912        assert_eq!(
1913            antimodes(vec![1usize, 3, 3, 3].into_iter()),
1914            (vec![1], 1, 1)
1915        );
1916        assert_eq!(
1917            antimodes(vec![3usize, 3, 4, 4].into_iter()),
1918            (vec![3, 4], 2, 2)
1919        );
1920        assert_eq!(
1921            antimodes(
1922                vec![
1923                    3usize, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
1924                    14, 14, 15, 15
1925                ]
1926                .into_iter()
1927            ),
1928            // we only show the first 10 of the 13 antimodes
1929            (vec![3, 4, 5, 6, 7, 8, 9, 10, 11, 12], 13, 2)
1930        );
1931        assert_eq!(
1932            antimodes(
1933                vec![
1934                    3usize, 3, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 4, 4, 5, 5, 6, 6, 7, 7, 13, 13,
1935                    14, 14, 15, 15
1936                ]
1937                .into_iter()
1938            ),
1939            (vec![3, 4, 5, 6, 7, 8, 9, 10, 11, 12], 13, 2)
1940        );
1941        assert_eq!(
1942            antimodes(vec![3usize, 3, 3, 4].into_iter()),
1943            (vec![4], 1, 1)
1944        );
1945        assert_eq!(
1946            antimodes(vec![4usize, 3, 3, 3].into_iter()),
1947            (vec![4], 1, 1)
1948        );
1949        assert_eq!(
1950            antimodes(vec![1usize, 1, 2, 2].into_iter()),
1951            (vec![1, 2], 2, 2)
1952        );
1953        let vec: Vec<u32> = vec![];
1954        assert_eq!(antimodes(vec.into_iter()), (vec![], 0, 0));
1955    }
1956
1957    #[test]
1958    fn antimodes_floats() {
1959        assert_eq!(
1960            antimodes(vec![3_f64, 5.0, 7.0, 9.0].into_iter()),
1961            (vec![3.0, 5.0, 7.0, 9.0], 4, 1)
1962        );
1963        assert_eq!(
1964            antimodes(vec![3_f64, 3.0, 3.0, 3.0].into_iter()),
1965            (vec![], 0, 0)
1966        );
1967        assert_eq!(
1968            antimodes(vec![3_f64, 3.0, 4.0, 4.0].into_iter()),
1969            (vec![3.0, 4.0], 2, 2)
1970        );
1971        assert_eq!(
1972            antimodes(vec![1_f64, 1.0, 2.0, 3.0, 3.0].into_iter()),
1973            (vec![2.0], 1, 1)
1974        );
1975    }
1976
1977    #[test]
1978    fn test_custom_percentiles() {
1979        // Test with integers
1980        let mut unsorted: Unsorted<i32> = Unsorted::new();
1981        unsorted.extend(1..=11); // [1,2,3,4,5,6,7,8,9,10,11]
1982
1983        let result = unsorted.custom_percentiles(&[25, 50, 75]).unwrap();
1984        assert_eq!(result, vec![3, 6, 9]);
1985
1986        // Test with strings
1987        let mut str_data = Unsorted::new();
1988        str_data.extend(vec!["a", "b", "c", "d", "e"]);
1989        let result = str_data.custom_percentiles(&[20, 40, 60, 80]).unwrap();
1990        assert_eq!(result, vec!["a", "b", "c", "d"]);
1991
1992        // Test with chars
1993        let mut char_data = Unsorted::new();
1994        char_data.extend('a'..='e');
1995        let result = char_data.custom_percentiles(&[25, 50, 75]).unwrap();
1996        assert_eq!(result, vec!['b', 'c', 'd']);
1997
1998        // Test with floats
1999        let mut float_data = Unsorted::new();
2000        float_data.extend(vec![1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9]);
2001        let result = float_data
2002            .custom_percentiles(&[10, 30, 50, 70, 90])
2003            .unwrap();
2004        assert_eq!(result, vec![1.1, 3.3, 5.5, 7.7, 9.9]);
2005
2006        // Test with empty percentiles array
2007        let result = float_data.custom_percentiles(&[]).unwrap();
2008        assert_eq!(result, Vec::<f64>::new());
2009
2010        // Test with duplicate percentiles
2011        let result = float_data.custom_percentiles(&[50, 50, 50]).unwrap();
2012        assert_eq!(result, vec![5.5]);
2013
2014        // Test with extreme percentiles
2015        let result = float_data.custom_percentiles(&[0, 100]).unwrap();
2016        assert_eq!(result, vec![1.1, 9.9]);
2017
2018        // Test with unsorted percentiles
2019        let result = float_data.custom_percentiles(&[75, 25, 50]).unwrap();
2020        assert_eq!(result, vec![3.3, 5.5, 7.7]); // results always sorted
2021
2022        // Test with single element
2023        let mut single = Unsorted::new();
2024        single.add(42);
2025        let result = single.custom_percentiles(&[0, 50, 100]).unwrap();
2026        assert_eq!(result, vec![42, 42, 42]);
2027    }
2028
2029    #[test]
2030    fn quartiles_stream() {
2031        assert_eq!(
2032            quartiles(vec![3usize, 5, 7].into_iter()),
2033            Some((3., 5., 7.))
2034        );
2035        assert_eq!(
2036            quartiles(vec![3usize, 5, 7, 9].into_iter()),
2037            Some((4., 6., 8.))
2038        );
2039        assert_eq!(
2040            quartiles(vec![1usize, 2, 7, 11].into_iter()),
2041            Some((1.5, 4.5, 9.))
2042        );
2043        assert_eq!(
2044            quartiles(vec![3usize, 5, 7, 9, 12].into_iter()),
2045            Some((4., 7., 10.5))
2046        );
2047        assert_eq!(
2048            quartiles(vec![2usize, 2, 3, 8, 10].into_iter()),
2049            Some((2., 3., 9.))
2050        );
2051        assert_eq!(
2052            quartiles(vec![3usize, 5, 7, 9, 12, 20].into_iter()),
2053            Some((5., 8., 12.))
2054        );
2055        assert_eq!(
2056            quartiles(vec![0usize, 2, 4, 8, 10, 11].into_iter()),
2057            Some((2., 6., 10.))
2058        );
2059        assert_eq!(
2060            quartiles(vec![3usize, 5, 7, 9, 12, 20, 21].into_iter()),
2061            Some((5., 9., 20.))
2062        );
2063        assert_eq!(
2064            quartiles(vec![1usize, 5, 6, 6, 7, 10, 19].into_iter()),
2065            Some((5., 6., 10.))
2066        );
2067    }
2068
2069    #[test]
2070    fn quartiles_floats() {
2071        assert_eq!(
2072            quartiles(vec![3_f64, 5., 7.].into_iter()),
2073            Some((3., 5., 7.))
2074        );
2075        assert_eq!(
2076            quartiles(vec![3_f64, 5., 7., 9.].into_iter()),
2077            Some((4., 6., 8.))
2078        );
2079        assert_eq!(
2080            quartiles(vec![3_f64, 5., 7., 9., 12.].into_iter()),
2081            Some((4., 7., 10.5))
2082        );
2083        assert_eq!(
2084            quartiles(vec![3_f64, 5., 7., 9., 12., 20.].into_iter()),
2085            Some((5., 8., 12.))
2086        );
2087        assert_eq!(
2088            quartiles(vec![3_f64, 5., 7., 9., 12., 20., 21.].into_iter()),
2089            Some((5., 9., 20.))
2090        );
2091    }
2092
2093    #[test]
2094    fn test_quartiles_zero_copy_small() {
2095        // Test edge cases for zero-copy quartiles
2096        let unsorted: Unsorted<i32> = Unsorted::new();
2097        assert_eq!(unsorted.quartiles_zero_copy(), None);
2098
2099        let mut unsorted = Unsorted::new();
2100        unsorted.extend(vec![1, 2]);
2101        assert_eq!(unsorted.quartiles_zero_copy(), None);
2102
2103        let mut unsorted = Unsorted::new();
2104        unsorted.extend(vec![1, 2, 3]);
2105        assert_eq!(unsorted.quartiles_zero_copy(), Some((1.0, 2.0, 3.0)));
2106
2107        // Test larger case
2108        let mut unsorted = Unsorted::new();
2109        unsorted.extend(vec![3, 5, 7, 9]);
2110        assert_eq!(unsorted.quartiles_zero_copy(), Some((4.0, 6.0, 8.0)));
2111    }
2112
2113    #[test]
2114    fn gini_empty() {
2115        let mut unsorted: Unsorted<i32> = Unsorted::new();
2116        assert_eq!(unsorted.gini(None), None);
2117        let empty_vec: Vec<i32> = vec![];
2118        assert_eq!(gini(empty_vec.into_iter(), None), None);
2119    }
2120
2121    #[test]
2122    fn gini_single_element() {
2123        let mut unsorted = Unsorted::new();
2124        unsorted.add(5);
2125        assert_eq!(unsorted.gini(None), Some(0.0));
2126        assert_eq!(gini(vec![5].into_iter(), None), Some(0.0));
2127    }
2128
2129    #[test]
2130    fn gini_perfect_equality() {
2131        // All values are the same - perfect equality, Gini = 0
2132        let mut unsorted = Unsorted::new();
2133        unsorted.extend(vec![10, 10, 10, 10, 10]);
2134        let result = unsorted.gini(None).unwrap();
2135        assert!((result - 0.0).abs() < 1e-10, "Expected 0.0, got {}", result);
2136
2137        assert!((gini(vec![10, 10, 10, 10, 10].into_iter(), None).unwrap() - 0.0).abs() < 1e-10);
2138    }
2139
2140    #[test]
2141    fn gini_perfect_inequality() {
2142        // One value has everything, others have zero - perfect inequality
2143        // For [0, 0, 0, 0, 100], Gini should be close to 1
2144        let mut unsorted = Unsorted::new();
2145        unsorted.extend(vec![0, 0, 0, 0, 100]);
2146        let result = unsorted.gini(None).unwrap();
2147        // Perfect inequality should give Gini close to 1
2148        // For n=5, one value=100, others=0: G = (2*5*100)/(5*100) - 6/5 = 2 - 1.2 = 0.8
2149        assert!((result - 0.8).abs() < 1e-10, "Expected 0.8, got {}", result);
2150    }
2151
2152    #[test]
2153    fn gini_stream() {
2154        // Test with known values
2155        // For [1, 2, 3, 4, 5]:
2156        // sum = 15
2157        // weighted_sum = 1*1 + 2*2 + 3*3 + 4*4 + 5*5 = 1 + 4 + 9 + 16 + 25 = 55
2158        // n = 5
2159        // G = (2 * 55) / (5 * 15) - 6/5 = 110/75 - 1.2 = 1.4667 - 1.2 = 0.2667
2160        let result = gini(vec![1usize, 2, 3, 4, 5].into_iter(), None).unwrap();
2161        let expected = (2.0 * 55.0) / (5.0 * 15.0) - 6.0 / 5.0;
2162        assert!(
2163            (result - expected).abs() < 1e-10,
2164            "Expected {}, got {}",
2165            expected,
2166            result
2167        );
2168    }
2169
2170    #[test]
2171    fn gini_floats() {
2172        let mut unsorted = Unsorted::new();
2173        unsorted.extend(vec![1.0, 2.0, 3.0, 4.0, 5.0]);
2174        let result = unsorted.gini(None).unwrap();
2175        let expected = (2.0 * 55.0) / (5.0 * 15.0) - 6.0 / 5.0;
2176        assert!((result - expected).abs() < 1e-10);
2177
2178        assert!(
2179            (gini(vec![1.0f64, 2.0, 3.0, 4.0, 5.0].into_iter(), None).unwrap() - expected).abs()
2180                < 1e-10
2181        );
2182    }
2183
2184    #[test]
2185    fn gini_all_zeros() {
2186        // All zeros - sum is zero, Gini is undefined
2187        let mut unsorted = Unsorted::new();
2188        unsorted.extend(vec![0, 0, 0, 0]);
2189        assert_eq!(unsorted.gini(None), None);
2190        assert_eq!(gini(vec![0, 0, 0, 0].into_iter(), None), None);
2191    }
2192
2193    #[test]
2194    fn gini_negative_values() {
2195        // Test with negative values (mathematically valid)
2196        let mut unsorted = Unsorted::new();
2197        unsorted.extend(vec![-5, -3, -1, 1, 3, 5]);
2198        let result = unsorted.gini(None);
2199        // Sum is 0, so Gini is undefined
2200        assert_eq!(result, None);
2201
2202        // Test with negative values that don't sum to zero
2203        let mut unsorted = Unsorted::new();
2204        unsorted.extend(vec![-2, -1, 0, 1, 2]);
2205        let result = unsorted.gini(None);
2206        // Sum is 0, so Gini is undefined
2207        assert_eq!(result, None);
2208
2209        // Test with values containing negatives that sum to non-zero
2210        // Gini is undefined for negative values, should return None
2211        let mut unsorted = Unsorted::new();
2212        unsorted.extend(vec![-1, 0, 1, 2, 3]);
2213        let result = unsorted.gini(None);
2214        assert_eq!(result, None);
2215    }
2216
2217    #[test]
2218    fn gini_known_cases() {
2219        // Test case: [1, 1, 1, 1, 1] - perfect equality
2220        let mut unsorted = Unsorted::new();
2221        unsorted.extend(vec![1, 1, 1, 1, 1]);
2222        let result = unsorted.gini(None).unwrap();
2223        assert!((result - 0.0).abs() < 1e-10);
2224
2225        // Test case: [0, 0, 0, 0, 1] - high inequality
2226        let mut unsorted = Unsorted::new();
2227        unsorted.extend(vec![0, 0, 0, 0, 1]);
2228        let result = unsorted.gini(None).unwrap();
2229        // G = (2 * 5 * 1) / (5 * 1) - 6/5 = 2 - 1.2 = 0.8
2230        assert!((result - 0.8).abs() < 1e-10);
2231
2232        // Test case: [1, 2, 3] - moderate inequality
2233        let mut unsorted = Unsorted::new();
2234        unsorted.extend(vec![1, 2, 3]);
2235        let result = unsorted.gini(None).unwrap();
2236        // sum = 6, weighted_sum = 1*1 + 2*2 + 3*3 = 1 + 4 + 9 = 14
2237        // G = (2 * 14) / (3 * 6) - 4/3 = 28/18 - 4/3 = 1.5556 - 1.3333 = 0.2222
2238        let expected = (2.0 * 14.0) / (3.0 * 6.0) - 4.0 / 3.0;
2239        assert!((result - expected).abs() < 1e-10);
2240    }
2241
2242    #[test]
2243    fn gini_precalc_sum() {
2244        // Test with pre-calculated sum
2245        let mut unsorted = Unsorted::new();
2246        unsorted.extend(vec![1, 2, 3, 4, 5]);
2247        let precalc_sum = Some(15.0);
2248        let result = unsorted.gini(precalc_sum).unwrap();
2249        let expected = (2.0 * 55.0) / (5.0 * 15.0) - 6.0 / 5.0;
2250        assert!((result - expected).abs() < 1e-10);
2251
2252        // Test that pre-calculated sum gives same result
2253        let mut unsorted2 = Unsorted::new();
2254        unsorted2.extend(vec![1, 2, 3, 4, 5]);
2255        let result2 = unsorted2.gini(None).unwrap();
2256        assert!((result - result2).abs() < 1e-10);
2257    }
2258
2259    #[test]
2260    fn gini_large_dataset() {
2261        // Test with larger dataset to exercise parallel path
2262        let data: Vec<i32> = (1..=1000).collect();
2263        let result = gini(data.iter().copied(), None);
2264        assert!(result.is_some());
2265        let gini_val = result.unwrap();
2266        // For uniform distribution, Gini should be positive but not too high
2267        assert!(gini_val > 0.0 && gini_val < 0.5);
2268    }
2269
2270    #[test]
2271    fn gini_unsorted_vs_sorted() {
2272        // Test that sorting doesn't affect result
2273        let mut unsorted1 = Unsorted::new();
2274        unsorted1.extend(vec![5, 2, 8, 1, 9, 3, 7, 4, 6]);
2275        let result1 = unsorted1.gini(None).unwrap();
2276
2277        let mut unsorted2 = Unsorted::new();
2278        unsorted2.extend(vec![1, 2, 3, 4, 5, 6, 7, 8, 9]);
2279        let result2 = unsorted2.gini(None).unwrap();
2280
2281        assert!((result1 - result2).abs() < 1e-10);
2282    }
2283
2284    #[test]
2285    fn gini_small_values() {
2286        // Test with very small values
2287        let mut unsorted = Unsorted::new();
2288        unsorted.extend(vec![0.001, 0.002, 0.003, 0.004, 0.005]);
2289        let result = unsorted.gini(None);
2290        assert!(result.is_some());
2291        // Should be same as [1, 2, 3, 4, 5] scaled down
2292        let expected = (2.0 * 55.0) / (5.0 * 15.0) - 6.0 / 5.0;
2293        assert!((result.unwrap() - expected).abs() < 1e-10);
2294    }
2295
2296    #[test]
2297    fn gini_large_values() {
2298        // Test with large values
2299        let mut unsorted = Unsorted::new();
2300        unsorted.extend(vec![1000, 2000, 3000, 4000, 5000]);
2301        let result = unsorted.gini(None);
2302        assert!(result.is_some());
2303        // Should be same as [1, 2, 3, 4, 5] scaled up
2304        let expected = (2.0 * 55.0) / (5.0 * 15.0) - 6.0 / 5.0;
2305        assert!((result.unwrap() - expected).abs() < 1e-10);
2306    }
2307
2308    #[test]
2309    fn gini_two_elements() {
2310        // Test with exactly 2 elements
2311        let mut unsorted = Unsorted::new();
2312        unsorted.extend(vec![1, 2]);
2313        let result = unsorted.gini(None).unwrap();
2314        // For [1, 2]: sum=3, weighted_sum=1*1+2*2=5, n=2
2315        // G = (2*5)/(2*3) - 3/2 = 10/6 - 1.5 = 1.6667 - 1.5 = 0.1667
2316        let expected = (2.0 * 5.0) / (2.0 * 3.0) - 3.0 / 2.0;
2317        assert!((result - expected).abs() < 1e-10);
2318    }
2319
2320    #[test]
2321    fn gini_precalc_sum_zero() {
2322        // Test with pre-calculated sum of zero (should return None)
2323        let mut unsorted = Unsorted::new();
2324        unsorted.extend(vec![1, 2, 3, 4, 5]);
2325        let result = unsorted.gini(Some(0.0));
2326        assert_eq!(result, None);
2327    }
2328
2329    #[test]
2330    fn gini_precalc_sum_negative() {
2331        // Gini is undefined for negative values, should return None
2332        let mut unsorted = Unsorted::new();
2333        unsorted.extend(vec![-5, -3, -1, 1, 3]);
2334        let result = unsorted.gini(None);
2335        assert_eq!(result, None);
2336
2337        // Negative precalculated sum should also return None
2338        let mut unsorted = Unsorted::new();
2339        unsorted.extend(vec![1, 2, 3]);
2340        let result = unsorted.gini(Some(-5.0));
2341        assert_eq!(result, None);
2342    }
2343
2344    #[test]
2345    fn gini_different_types() {
2346        // Test with different integer types
2347        let mut unsorted_u32 = Unsorted::new();
2348        unsorted_u32.extend(vec![1u32, 2, 3, 4, 5]);
2349        let result_u32 = unsorted_u32.gini(None).unwrap();
2350
2351        let mut unsorted_i64 = Unsorted::new();
2352        unsorted_i64.extend(vec![1i64, 2, 3, 4, 5]);
2353        let result_i64 = unsorted_i64.gini(None).unwrap();
2354
2355        let expected = (2.0 * 55.0) / (5.0 * 15.0) - 6.0 / 5.0;
2356        assert!((result_u32 - expected).abs() < 1e-10);
2357        assert!((result_i64 - expected).abs() < 1e-10);
2358    }
2359
2360    #[test]
2361    fn gini_extreme_inequality() {
2362        // Test with extreme inequality: one very large value, many zeros
2363        let mut unsorted = Unsorted::new();
2364        unsorted.extend(vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 1000]);
2365        let result = unsorted.gini(None).unwrap();
2366        // For [0,0,0,0,0,0,0,0,0,1000]: sum=1000, weighted_sum=10*1000=10000, n=10
2367        // G = (2*10000)/(10*1000) - 11/10 = 20/10 - 1.1 = 2 - 1.1 = 0.9
2368        assert!((result - 0.9).abs() < 1e-10);
2369    }
2370
2371    #[test]
2372    fn gini_duplicate_values() {
2373        // Test with many duplicate values
2374        let mut unsorted = Unsorted::new();
2375        unsorted.extend(vec![1, 1, 1, 5, 5, 5, 10, 10, 10]);
2376        let result = unsorted.gini(None);
2377        assert!(result.is_some());
2378        // Should be between 0 and 1
2379        let gini_val = result.unwrap();
2380        assert!((0.0..=1.0).contains(&gini_val));
2381    }
2382
2383    #[test]
2384    fn kurtosis_empty() {
2385        let mut unsorted: Unsorted<i32> = Unsorted::new();
2386        assert_eq!(unsorted.kurtosis(None, None), None);
2387        let empty_vec: Vec<i32> = vec![];
2388        assert_eq!(kurtosis(empty_vec.into_iter(), None, None), None);
2389    }
2390
2391    #[test]
2392    fn kurtosis_small() {
2393        // Need at least 4 elements
2394        let mut unsorted = Unsorted::new();
2395        unsorted.extend(vec![1, 2]);
2396        assert_eq!(unsorted.kurtosis(None, None), None);
2397
2398        let mut unsorted = Unsorted::new();
2399        unsorted.extend(vec![1, 2, 3]);
2400        assert_eq!(unsorted.kurtosis(None, None), None);
2401    }
2402
2403    #[test]
2404    fn kurtosis_normal_distribution() {
2405        // Normal distribution should have kurtosis close to 0
2406        let mut unsorted = Unsorted::new();
2407        unsorted.extend(vec![1, 2, 3, 4, 5]);
2408        let result = unsorted.kurtosis(None, None);
2409        assert!(result.is_some());
2410        // For small samples, kurtosis can vary significantly
2411    }
2412
2413    #[test]
2414    fn kurtosis_all_same() {
2415        // All same values - variance is 0, kurtosis undefined
2416        let mut unsorted = Unsorted::new();
2417        unsorted.extend(vec![5, 5, 5, 5]);
2418        assert_eq!(unsorted.kurtosis(None, None), None);
2419    }
2420
2421    #[test]
2422    fn kurtosis_stream() {
2423        let result = kurtosis(vec![1usize, 2, 3, 4, 5].into_iter(), None, None);
2424        assert!(result.is_some());
2425    }
2426
2427    #[test]
2428    fn kurtosis_precalc_mean_variance() {
2429        // Test with pre-calculated mean and variance
2430        let mut unsorted = Unsorted::new();
2431        unsorted.extend(vec![1, 2, 3, 4, 5]);
2432
2433        // Calculate mean and variance manually
2434        let mean = 3.0f64;
2435        let variance = ((1.0f64 - 3.0).powi(2)
2436            + (2.0f64 - 3.0).powi(2)
2437            + (3.0f64 - 3.0).powi(2)
2438            + (4.0f64 - 3.0).powi(2)
2439            + (5.0f64 - 3.0).powi(2))
2440            / 5.0;
2441
2442        let result = unsorted.kurtosis(Some(mean), Some(variance));
2443        assert!(result.is_some());
2444
2445        // Test that pre-calculated values give same result
2446        let mut unsorted2 = Unsorted::new();
2447        unsorted2.extend(vec![1, 2, 3, 4, 5]);
2448        let result2 = unsorted2.kurtosis(None, None);
2449        assert!((result.unwrap() - result2.unwrap()).abs() < 1e-10);
2450    }
2451
2452    #[test]
2453    fn kurtosis_precalc_mean_only() {
2454        // Test with pre-calculated mean only
2455        let mut unsorted = Unsorted::new();
2456        unsorted.extend(vec![1, 2, 3, 4, 5]);
2457        let mean = 3.0f64;
2458
2459        let result = unsorted.kurtosis(Some(mean), None);
2460        assert!(result.is_some());
2461
2462        // Test that pre-calculated mean gives same result
2463        let mut unsorted2 = Unsorted::new();
2464        unsorted2.extend(vec![1, 2, 3, 4, 5]);
2465        let result2 = unsorted2.kurtosis(None, None);
2466        assert!((result.unwrap() - result2.unwrap()).abs() < 1e-10);
2467    }
2468
2469    #[test]
2470    fn kurtosis_precalc_variance_only() {
2471        // Test with pre-calculated variance only
2472        let mut unsorted = Unsorted::new();
2473        unsorted.extend(vec![1, 2, 3, 4, 5]);
2474        let variance = ((1.0f64 - 3.0).powi(2)
2475            + (2.0f64 - 3.0).powi(2)
2476            + (3.0f64 - 3.0).powi(2)
2477            + (4.0f64 - 3.0).powi(2)
2478            + (5.0f64 - 3.0).powi(2))
2479            / 5.0;
2480
2481        let result = unsorted.kurtosis(None, Some(variance));
2482        assert!(result.is_some());
2483
2484        // Test that pre-calculated variance gives same result
2485        let mut unsorted2 = Unsorted::new();
2486        unsorted2.extend(vec![1, 2, 3, 4, 5]);
2487        let result2 = unsorted2.kurtosis(None, None);
2488        assert!((result.unwrap() - result2.unwrap()).abs() < 1e-10);
2489    }
2490
2491    #[test]
2492    fn kurtosis_exact_calculation() {
2493        // Test with exact calculation for [1, 2, 3, 4]
2494        // Mean = 2.5
2495        // Variance = ((1-2.5)^2 + (2-2.5)^2 + (3-2.5)^2 + (4-2.5)^2) / 4 = (2.25 + 0.25 + 0.25 + 2.25) / 4 = 1.25
2496        // Variance^2 = 1.5625
2497        // Fourth powers: (1-2.5)^4 + (2-2.5)^4 + (3-2.5)^4 + (4-2.5)^4 = 5.0625 + 0.0625 + 0.0625 + 5.0625 = 10.25
2498        // n = 4
2499        // Kurtosis = (4*5*10.25) / (3*2*1*1.5625) - 3*3*3/(2*1) = 205 / 9.375 - 13.5 = 21.8667 - 13.5 = 8.3667
2500        let mut unsorted = Unsorted::new();
2501        unsorted.extend(vec![1, 2, 3, 4]);
2502        let result = unsorted.kurtosis(None, None).unwrap();
2503        // For small samples, kurtosis can be very high
2504        assert!(result.is_finite());
2505    }
2506
2507    #[test]
2508    fn kurtosis_uniform_distribution() {
2509        // Uniform distribution should have negative excess kurtosis
2510        let mut unsorted = Unsorted::new();
2511        unsorted.extend(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
2512        let result = unsorted.kurtosis(None, None).unwrap();
2513        // Uniform distribution has excess kurtosis = -1.2
2514        // But for small samples, it can vary significantly
2515        assert!(result.is_finite());
2516    }
2517
2518    #[test]
2519    fn kurtosis_large_dataset() {
2520        // Test with larger dataset to exercise parallel path
2521        let data: Vec<i32> = (1..=1000).collect();
2522        let result = kurtosis(data.iter().copied(), None, None);
2523        assert!(result.is_some());
2524        let kurt_val = result.unwrap();
2525        assert!(kurt_val.is_finite());
2526    }
2527
2528    #[test]
2529    fn kurtosis_unsorted_vs_sorted() {
2530        // Test that sorting doesn't affect result
2531        let mut unsorted1 = Unsorted::new();
2532        unsorted1.extend(vec![5, 2, 8, 1, 9, 3, 7, 4, 6]);
2533        let result1 = unsorted1.kurtosis(None, None).unwrap();
2534
2535        let mut unsorted2 = Unsorted::new();
2536        unsorted2.extend(vec![1, 2, 3, 4, 5, 6, 7, 8, 9]);
2537        let result2 = unsorted2.kurtosis(None, None).unwrap();
2538
2539        assert!((result1 - result2).abs() < 1e-10);
2540    }
2541
2542    #[test]
2543    fn kurtosis_minimum_size() {
2544        // Test with exactly 4 elements (minimum required)
2545        let mut unsorted = Unsorted::new();
2546        unsorted.extend(vec![1, 2, 3, 4]);
2547        let result = unsorted.kurtosis(None, None);
2548        assert!(result.is_some());
2549        assert!(result.unwrap().is_finite());
2550    }
2551
2552    #[test]
2553    fn kurtosis_heavy_tailed() {
2554        // Test with heavy-tailed distribution (outliers)
2555        let mut unsorted = Unsorted::new();
2556        unsorted.extend(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 100]);
2557        let result = unsorted.kurtosis(None, None).unwrap();
2558        // Heavy tails should give positive excess kurtosis
2559        assert!(result.is_finite());
2560        // With an outlier, kurtosis should be positive
2561        assert!(result > -10.0); // Allow some variance but should be reasonable
2562    }
2563
2564    #[test]
2565    fn kurtosis_light_tailed() {
2566        // Test with light-tailed distribution (values close together)
2567        let mut unsorted = Unsorted::new();
2568        unsorted.extend(vec![10, 11, 12, 13, 14, 15, 16, 17, 18, 19]);
2569        let result = unsorted.kurtosis(None, None).unwrap();
2570        // Light tails might give negative excess kurtosis
2571        assert!(result.is_finite());
2572    }
2573
2574    #[test]
2575    fn kurtosis_small_variance() {
2576        // Test with very small variance (values very close together)
2577        let mut unsorted = Unsorted::new();
2578        unsorted.extend(vec![10.0, 10.001, 10.002, 10.003, 10.004]);
2579        let result = unsorted.kurtosis(None, None);
2580        // Should still compute (variance is very small but non-zero)
2581        assert!(result.is_some());
2582        assert!(result.unwrap().is_finite());
2583    }
2584
2585    #[test]
2586    fn kurtosis_precalc_zero_variance() {
2587        // Test with pre-calculated variance of zero (should return None)
2588        let mut unsorted = Unsorted::new();
2589        unsorted.extend(vec![1, 2, 3, 4, 5]);
2590        let result = unsorted.kurtosis(None, Some(0.0));
2591        assert_eq!(result, None);
2592    }
2593
2594    #[test]
2595    fn kurtosis_precalc_negative_variance() {
2596        // Test with negative variance (invalid, but should handle gracefully)
2597        let mut unsorted = Unsorted::new();
2598        unsorted.extend(vec![1, 2, 3, 4, 5]);
2599        // Negative variance is invalid, but function should handle it
2600        let result = unsorted.kurtosis(None, Some(-1.0));
2601        // Should either return None or handle it gracefully
2602        // The function computes variance_sq = variance^2, so negative becomes positive
2603        // But this is invalid input, so behavior may vary
2604        // For now, just check it doesn't panic
2605        let _ = result;
2606    }
2607
2608    #[test]
2609    fn kurtosis_different_types() {
2610        // Test with different integer types
2611        let mut unsorted_u32 = Unsorted::new();
2612        unsorted_u32.extend(vec![1u32, 2, 3, 4, 5]);
2613        let result_u32 = unsorted_u32.kurtosis(None, None).unwrap();
2614
2615        let mut unsorted_i64 = Unsorted::new();
2616        unsorted_i64.extend(vec![1i64, 2, 3, 4, 5]);
2617        let result_i64 = unsorted_i64.kurtosis(None, None).unwrap();
2618
2619        assert!((result_u32 - result_i64).abs() < 1e-10);
2620    }
2621
2622    #[test]
2623    fn kurtosis_floating_point_precision() {
2624        // Test floating point precision
2625        let mut unsorted = Unsorted::new();
2626        unsorted.extend(vec![1.1, 2.2, 3.3, 4.4, 5.5]);
2627        let result = unsorted.kurtosis(None, None);
2628        assert!(result.is_some());
2629        assert!(result.unwrap().is_finite());
2630    }
2631
2632    #[test]
2633    fn kurtosis_negative_values() {
2634        // Test with negative values
2635        let mut unsorted = Unsorted::new();
2636        unsorted.extend(vec![-5, -3, -1, 1, 3, 5]);
2637        let result = unsorted.kurtosis(None, None);
2638        assert!(result.is_some());
2639        assert!(result.unwrap().is_finite());
2640    }
2641
2642    #[test]
2643    fn kurtosis_mixed_positive_negative() {
2644        // Test with mixed positive and negative values
2645        let mut unsorted = Unsorted::new();
2646        unsorted.extend(vec![-10, -5, 0, 5, 10]);
2647        let result = unsorted.kurtosis(None, None);
2648        assert!(result.is_some());
2649        assert!(result.unwrap().is_finite());
2650    }
2651
2652    #[test]
2653    fn kurtosis_duplicate_values() {
2654        // Test with duplicate values (but not all same)
2655        let mut unsorted = Unsorted::new();
2656        unsorted.extend(vec![1, 1, 2, 2, 3, 3, 4, 4, 5, 5]);
2657        let result = unsorted.kurtosis(None, None);
2658        assert!(result.is_some());
2659        assert!(result.unwrap().is_finite());
2660    }
2661
2662    #[test]
2663    fn kurtosis_precalc_mean_wrong() {
2664        // Test that wrong pre-calculated mean gives wrong result
2665        let mut unsorted1 = Unsorted::new();
2666        unsorted1.extend(vec![1, 2, 3, 4, 5]);
2667        let correct_result = unsorted1.kurtosis(None, None).unwrap();
2668
2669        let mut unsorted2 = Unsorted::new();
2670        unsorted2.extend(vec![1, 2, 3, 4, 5]);
2671        let wrong_mean = 10.0; // Wrong mean
2672        let wrong_result = unsorted2.kurtosis(Some(wrong_mean), None).unwrap();
2673
2674        // Results should be different
2675        assert!((correct_result - wrong_result).abs() > 1e-5);
2676    }
2677
2678    #[test]
2679    fn percentile_rank_empty() {
2680        let mut unsorted: Unsorted<i32> = Unsorted::new();
2681        assert_eq!(unsorted.percentile_rank(5), None);
2682        let empty_vec: Vec<i32> = vec![];
2683        assert_eq!(percentile_rank(empty_vec.into_iter(), 5), None);
2684    }
2685
2686    #[test]
2687    fn percentile_rank_basic() {
2688        let mut unsorted = Unsorted::new();
2689        unsorted.extend(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
2690
2691        // Value less than all
2692        assert_eq!(unsorted.percentile_rank(0), Some(0.0));
2693
2694        // Value greater than all
2695        assert_eq!(unsorted.percentile_rank(11), Some(100.0));
2696
2697        // Median (5) should be around 50th percentile
2698        let rank = unsorted.percentile_rank(5).unwrap();
2699        assert!((rank - 50.0).abs() < 1.0);
2700
2701        // First value should be at 10th percentile
2702        let rank = unsorted.percentile_rank(1).unwrap();
2703        assert!((rank - 10.0).abs() < 1.0);
2704    }
2705
2706    #[test]
2707    fn percentile_rank_duplicates() {
2708        let mut unsorted = Unsorted::new();
2709        unsorted.extend(vec![1, 1, 2, 2, 3, 3, 4, 4, 5, 5]);
2710
2711        // Value 2 appears twice, should be at 40th percentile (4 values <= 2)
2712        let rank = unsorted.percentile_rank(2).unwrap();
2713        assert!((rank - 40.0).abs() < 1.0);
2714    }
2715
2716    #[test]
2717    fn percentile_rank_stream() {
2718        let result = percentile_rank(vec![1usize, 2, 3, 4, 5].into_iter(), 3);
2719        assert_eq!(result, Some(60.0)); // 3 out of 5 values <= 3
2720    }
2721
2722    #[test]
2723    fn percentile_rank_many_ties() {
2724        // 100 copies of 5 followed by 100 copies of 10 — tests O(log n) upper bound
2725        let mut unsorted = Unsorted::new();
2726        for _ in 0..100 {
2727            unsorted.add(5u32);
2728        }
2729        for _ in 0..100 {
2730            unsorted.add(10u32);
2731        }
2732        // 100 values <= 5 out of 200
2733        let rank = unsorted.percentile_rank(5).unwrap();
2734        assert!((rank - 50.0).abs() < f64::EPSILON);
2735        // All 200 values <= 10
2736        let mut unsorted2 = Unsorted::new();
2737        for _ in 0..100 {
2738            unsorted2.add(5u32);
2739        }
2740        for _ in 0..100 {
2741            unsorted2.add(10u32);
2742        }
2743        let rank = unsorted2.percentile_rank(10).unwrap();
2744        assert!((rank - 100.0).abs() < f64::EPSILON);
2745    }
2746
2747    #[test]
2748    fn atkinson_empty() {
2749        let mut unsorted: Unsorted<i32> = Unsorted::new();
2750        assert_eq!(unsorted.atkinson(1.0, None, None), None);
2751        let empty_vec: Vec<i32> = vec![];
2752        assert_eq!(atkinson(empty_vec.into_iter(), 1.0, None, None), None);
2753    }
2754
2755    #[test]
2756    fn atkinson_single_element() {
2757        let mut unsorted = Unsorted::new();
2758        unsorted.add(5);
2759        assert_eq!(unsorted.atkinson(1.0, None, None), Some(0.0));
2760        assert_eq!(atkinson(vec![5].into_iter(), 1.0, None, None), Some(0.0));
2761    }
2762
2763    #[test]
2764    fn atkinson_perfect_equality() {
2765        // All values the same - perfect equality, Atkinson = 0
2766        let mut unsorted = Unsorted::new();
2767        unsorted.extend(vec![10, 10, 10, 10, 10]);
2768        let result = unsorted.atkinson(1.0, None, None).unwrap();
2769        assert!((result - 0.0).abs() < 1e-10);
2770    }
2771
2772    #[test]
2773    fn atkinson_epsilon_zero() {
2774        // Epsilon = 0 means no inequality aversion, should return 0
2775        let mut unsorted = Unsorted::new();
2776        unsorted.extend(vec![1, 2, 3, 4, 5]);
2777        let result = unsorted.atkinson(0.0, None, None).unwrap();
2778        assert!((result - 0.0).abs() < 1e-10);
2779    }
2780
2781    #[test]
2782    fn atkinson_epsilon_one() {
2783        // Epsilon = 1 uses geometric mean
2784        let mut unsorted = Unsorted::new();
2785        unsorted.extend(vec![1, 2, 3, 4, 5]);
2786        let result = unsorted.atkinson(1.0, None, None);
2787        assert!(result.is_some());
2788    }
2789
2790    #[test]
2791    fn atkinson_negative_epsilon() {
2792        let mut unsorted = Unsorted::new();
2793        unsorted.extend(vec![1, 2, 3, 4, 5]);
2794        assert_eq!(unsorted.atkinson(-1.0, None, None), None);
2795    }
2796
2797    #[test]
2798    fn atkinson_zero_mean() {
2799        // If mean is zero, Atkinson is undefined
2800        let mut unsorted = Unsorted::new();
2801        unsorted.extend(vec![0, 0, 0, 0]);
2802        assert_eq!(unsorted.atkinson(1.0, None, None), None);
2803    }
2804
2805    #[test]
2806    fn atkinson_stream() {
2807        let result = atkinson(vec![1usize, 2, 3, 4, 5].into_iter(), 1.0, None, None);
2808        assert!(result.is_some());
2809    }
2810
2811    #[test]
2812    fn atkinson_precalc_mean_geometric_sum() {
2813        // Test with pre-calculated mean and geometric_sum
2814        let mut unsorted = Unsorted::new();
2815        unsorted.extend(vec![1, 2, 3, 4, 5]);
2816
2817        // Calculate mean and geometric_sum manually
2818        let mean = 3.0f64;
2819        let geometric_sum = 1.0f64.ln() + 2.0f64.ln() + 3.0f64.ln() + 4.0f64.ln() + 5.0f64.ln();
2820
2821        let result = unsorted.atkinson(1.0, Some(mean), Some(geometric_sum));
2822        assert!(result.is_some());
2823
2824        // Test that pre-calculated values give same result
2825        let mut unsorted2 = Unsorted::new();
2826        unsorted2.extend(vec![1, 2, 3, 4, 5]);
2827        let result2 = unsorted2.atkinson(1.0, None, None);
2828        assert!((result.unwrap() - result2.unwrap()).abs() < 1e-10);
2829    }
2830
2831    #[test]
2832    fn atkinson_precalc_mean_only() {
2833        // Test with pre-calculated mean only
2834        let mut unsorted = Unsorted::new();
2835        unsorted.extend(vec![1, 2, 3, 4, 5]);
2836        let mean = 3.0f64;
2837
2838        let result = unsorted.atkinson(1.0, Some(mean), None);
2839        assert!(result.is_some());
2840
2841        // Test that pre-calculated mean gives same result
2842        let mut unsorted2 = Unsorted::new();
2843        unsorted2.extend(vec![1, 2, 3, 4, 5]);
2844        let result2 = unsorted2.atkinson(1.0, None, None);
2845        assert!((result.unwrap() - result2.unwrap()).abs() < 1e-10);
2846    }
2847
2848    #[test]
2849    fn atkinson_precalc_geometric_sum_only() {
2850        // Test with pre-calculated geometric_sum only
2851        let mut unsorted = Unsorted::new();
2852        unsorted.extend(vec![1, 2, 3, 4, 5]);
2853        let geometric_sum = 1.0f64.ln() + 2.0f64.ln() + 3.0f64.ln() + 4.0f64.ln() + 5.0f64.ln();
2854
2855        let result = unsorted.atkinson(1.0, None, Some(geometric_sum));
2856        assert!(result.is_some());
2857
2858        // Test that pre-calculated geometric_sum gives same result
2859        let mut unsorted2 = Unsorted::new();
2860        unsorted2.extend(vec![1, 2, 3, 4, 5]);
2861        let result2 = unsorted2.atkinson(1.0, None, None);
2862        assert!((result.unwrap() - result2.unwrap()).abs() < 1e-10);
2863    }
2864
2865    #[test]
2866    fn test_median_with_infinity() {
2867        let mut unsorted = Unsorted::new();
2868        unsorted.extend(vec![1.0f64, 2.0, f64::INFINITY]);
2869        assert_eq!(unsorted.median(), Some(2.0));
2870    }
2871
2872    #[test]
2873    fn test_median_with_neg_infinity() {
2874        let mut unsorted = Unsorted::new();
2875        unsorted.extend(vec![f64::NEG_INFINITY, 1.0f64, 2.0]);
2876        assert_eq!(unsorted.median(), Some(1.0));
2877    }
2878
2879    #[test]
2880    fn test_quartiles_with_infinity() {
2881        let mut unsorted = Unsorted::new();
2882        unsorted.extend(vec![f64::NEG_INFINITY, 1.0, 2.0, 3.0, f64::INFINITY]);
2883        let q = unsorted.quartiles();
2884        // Q2 (median) should be 2.0
2885        assert!(q.is_some());
2886        let (_, q2, _) = q.unwrap();
2887        assert_eq!(q2, 2.0);
2888    }
2889
2890    #[test]
2891    fn test_mode_with_nan() {
2892        // NaN breaks the Ord contract via Partial<T>, so sort order is
2893        // non-deterministic. We only verify the call doesn't panic —
2894        // the exact mode value depends on where NaN lands after sorting.
2895        let mut unsorted: Unsorted<f64> = Unsorted::new();
2896        unsorted.extend(vec![1.0, f64::NAN, 2.0, 2.0, 3.0]);
2897        let _result = unsorted.mode(); // must not panic
2898    }
2899
2900    #[test]
2901    fn test_gini_with_infinity() {
2902        let mut unsorted = Unsorted::new();
2903        unsorted.extend(vec![1.0f64, 2.0, f64::INFINITY]);
2904        let g = unsorted.gini(None);
2905        // Gini with infinity in the data: the weighted_sum/sum ratio involves
2906        // Inf/Inf which is NaN, so the result is Some(NaN) — not a meaningful
2907        // Gini coefficient, but importantly does not panic
2908        assert!(g.unwrap().is_nan());
2909    }
2910
2911    #[test]
2912    fn test_cardinality_with_infinity() {
2913        let mut unsorted = Unsorted::new();
2914        unsorted.extend(vec![1.0f64, f64::INFINITY, f64::NEG_INFINITY, 1.0]);
2915        assert_eq!(unsorted.cardinality(false, 10_000), 3);
2916    }
2917}
2918
2919#[cfg(test)]
2920mod bench {
2921    use super::*;
2922    use std::time::Instant;
2923
2924    #[test]
2925    #[ignore] // Run with `cargo test comprehensive_quartiles_benchmark -- --ignored --nocapture` to see performance comparison
2926    fn comprehensive_quartiles_benchmark() {
2927        // Test a wide range of data sizes
2928        let data_sizes = vec![
2929            1_000, 10_000, 100_000, 500_000, 1_000_000, 2_000_000, 5_000_000, 10_000_000,
2930        ];
2931
2932        println!("=== COMPREHENSIVE QUARTILES BENCHMARK ===\n");
2933
2934        for size in data_sizes {
2935            println!("--- Testing with {} elements ---", size);
2936
2937            // Test different data patterns
2938            let test_patterns = vec![
2939                ("Random", generate_random_data(size)),
2940                ("Reverse Sorted", {
2941                    let mut v = Vec::with_capacity(size);
2942                    for x in (0..size).rev() {
2943                        v.push(x as i32);
2944                    }
2945                    v
2946                }),
2947                ("Already Sorted", {
2948                    let mut v = Vec::with_capacity(size);
2949                    for x in 0..size {
2950                        v.push(x as i32);
2951                    }
2952                    v
2953                }),
2954                ("Many Duplicates", {
2955                    // Create a vector with just a few distinct values repeated many times
2956                    let mut v = Vec::with_capacity(size);
2957                    let chunk_size = size / 100;
2958                    for i in 0..100 {
2959                        v.extend(std::iter::repeat_n(i, chunk_size));
2960                    }
2961                    // Add any remaining elements
2962                    v.extend(std::iter::repeat_n(0, size - v.len()));
2963                    v
2964                }),
2965            ];
2966
2967            for (pattern_name, test_data) in test_patterns {
2968                println!("\n  Pattern: {}", pattern_name);
2969
2970                // Benchmark sorting-based approach
2971                let mut unsorted1 = Unsorted::new();
2972                unsorted1.extend(test_data.clone());
2973
2974                let start = Instant::now();
2975                let result_sorted = unsorted1.quartiles();
2976                let sorted_time = start.elapsed();
2977
2978                // Benchmark selection-based approach (with copying)
2979                let mut unsorted2 = Unsorted::new();
2980                unsorted2.extend(test_data.clone());
2981
2982                let start = Instant::now();
2983                let result_selection = unsorted2.quartiles_with_selection();
2984                let selection_time = start.elapsed();
2985
2986                // Benchmark zero-copy selection-based approach
2987                let mut unsorted3 = Unsorted::new();
2988                unsorted3.extend(test_data);
2989
2990                let start = Instant::now();
2991                let result_zero_copy = unsorted3.quartiles_zero_copy();
2992                let zero_copy_time = start.elapsed();
2993
2994                // Verify results are the same
2995                assert_eq!(result_sorted, result_selection);
2996                assert_eq!(result_sorted, result_zero_copy);
2997
2998                let selection_speedup =
2999                    sorted_time.as_nanos() as f64 / selection_time.as_nanos() as f64;
3000                let zero_copy_speedup =
3001                    sorted_time.as_nanos() as f64 / zero_copy_time.as_nanos() as f64;
3002
3003                println!("    Sorting:       {:>12?}", sorted_time);
3004                println!(
3005                    "    Selection:     {:>12?} (speedup: {:.2}x)",
3006                    selection_time, selection_speedup
3007                );
3008                println!(
3009                    "    Zero-copy:     {:>12?} (speedup: {:.2}x)",
3010                    zero_copy_time, zero_copy_speedup
3011                );
3012
3013                let best_algorithm =
3014                    if zero_copy_speedup > 1.0 && zero_copy_speedup >= selection_speedup {
3015                        "ZERO-COPY"
3016                    } else if selection_speedup > 1.0 {
3017                        "SELECTION"
3018                    } else {
3019                        "SORTING"
3020                    };
3021                println!("    Best: {}", best_algorithm);
3022            }
3023
3024            println!(); // Add blank line between sizes
3025        }
3026    }
3027
3028    // Generate random data for benchmarking
3029    fn generate_random_data(size: usize) -> Vec<i32> {
3030        // Simple LCG random number generator for reproducible results
3031        let mut rng = 1234567u64;
3032        let mut vec = Vec::with_capacity(size);
3033        for _ in 0..size {
3034            rng = rng.wrapping_mul(1103515245).wrapping_add(12345);
3035            vec.push((rng >> 16) as i32);
3036        }
3037        vec
3038    }
3039
3040    #[test]
3041    #[ignore] // Run with `cargo test find_selection_threshold -- --ignored --nocapture` to find exact threshold
3042    fn find_selection_threshold() {
3043        println!("=== FINDING SELECTION ALGORITHM THRESHOLD ===\n");
3044
3045        // Binary search approach to find the threshold
3046        let mut found_threshold = None;
3047        let test_sizes = vec![
3048            1_000_000, 2_000_000, 3_000_000, 4_000_000, 5_000_000, 7_500_000, 10_000_000,
3049            15_000_000, 20_000_000, 25_000_000, 30_000_000,
3050        ];
3051
3052        for size in test_sizes {
3053            println!("Testing size: {}", size);
3054
3055            // Use random data as it's most representative of real-world scenarios
3056            let test_data = generate_random_data(size);
3057
3058            // Run multiple iterations to get average performance
3059            let iterations = 3;
3060            let mut sorting_total = 0u128;
3061            let mut selection_total = 0u128;
3062            let mut zero_copy_total = 0u128;
3063
3064            for i in 0..iterations {
3065                println!("  Iteration {}/{}", i + 1, iterations);
3066
3067                // Sorting approach
3068                let mut unsorted1 = Unsorted::new();
3069                unsorted1.extend(test_data.clone());
3070
3071                let start = Instant::now();
3072                let _result_sorted = unsorted1.quartiles();
3073                sorting_total += start.elapsed().as_nanos();
3074
3075                // Selection approach (with copying)
3076                let mut unsorted2 = Unsorted::new();
3077                unsorted2.extend(test_data.clone());
3078
3079                let start = Instant::now();
3080                let _result_selection = unsorted2.quartiles_with_selection();
3081                selection_total += start.elapsed().as_nanos();
3082
3083                // Zero-copy selection approach
3084                let mut unsorted3 = Unsorted::new();
3085                unsorted3.extend(test_data.clone());
3086
3087                let start = Instant::now();
3088                let _result_zero_copy = unsorted3.quartiles_zero_copy();
3089                zero_copy_total += start.elapsed().as_nanos();
3090            }
3091
3092            let avg_sorting = sorting_total / iterations as u128;
3093            let avg_selection = selection_total / iterations as u128;
3094            let avg_zero_copy = zero_copy_total / iterations as u128;
3095            let selection_speedup = avg_sorting as f64 / avg_selection as f64;
3096            let zero_copy_speedup = avg_sorting as f64 / avg_zero_copy as f64;
3097
3098            println!(
3099                "  Average sorting:    {:>12.2}ms",
3100                avg_sorting as f64 / 1_000_000.0
3101            );
3102            println!(
3103                "  Average selection:  {:>12.2}ms (speedup: {:.2}x)",
3104                avg_selection as f64 / 1_000_000.0,
3105                selection_speedup
3106            );
3107            println!(
3108                "  Average zero-copy:  {:>12.2}ms (speedup: {:.2}x)",
3109                avg_zero_copy as f64 / 1_000_000.0,
3110                zero_copy_speedup
3111            );
3112
3113            if (selection_speedup > 1.0 || zero_copy_speedup > 1.0) && found_threshold.is_none() {
3114                found_threshold = Some(size);
3115                let best_method = if zero_copy_speedup > selection_speedup {
3116                    "Zero-copy"
3117                } else {
3118                    "Selection"
3119                };
3120                println!(
3121                    "  *** THRESHOLD FOUND: {} becomes faster at {} elements ***",
3122                    best_method, size
3123                );
3124            }
3125
3126            println!();
3127        }
3128
3129        match found_threshold {
3130            Some(threshold) => println!(
3131                "🎯 Selection algorithm becomes faster at approximately {} elements",
3132                threshold
3133            ),
3134            None => println!("❌ Selection algorithm did not become faster in the tested range"),
3135        }
3136    }
3137
3138    #[test]
3139    #[ignore] // Run with `cargo test benchmark_different_data_types -- --ignored --nocapture` to test different data types
3140    fn benchmark_different_data_types() {
3141        println!("=== BENCHMARKING DIFFERENT DATA TYPES ===\n");
3142
3143        let size = 5_000_000; // Use a large size where differences might be visible
3144
3145        // Test with f64 (floating point)
3146        println!("Testing with f64 data:");
3147        let float_data: Vec<f64> = generate_random_data(size)
3148            .into_iter()
3149            .map(|x| x as f64 / 1000.0)
3150            .collect();
3151
3152        let mut unsorted1 = Unsorted::new();
3153        unsorted1.extend(float_data.clone());
3154        let start = Instant::now();
3155        let _result = unsorted1.quartiles();
3156        let sorting_time = start.elapsed();
3157
3158        let mut unsorted2 = Unsorted::new();
3159        unsorted2.extend(float_data.clone());
3160        let start = Instant::now();
3161        let _result = unsorted2.quartiles_with_selection();
3162        let selection_time = start.elapsed();
3163
3164        let mut unsorted3 = Unsorted::new();
3165        unsorted3.extend(float_data);
3166        let start = Instant::now();
3167        let _result = unsorted3.quartiles_zero_copy();
3168        let zero_copy_time = start.elapsed();
3169
3170        println!("  Sorting:    {:?}", sorting_time);
3171        println!("  Selection:  {:?}", selection_time);
3172        println!("  Zero-copy:  {:?}", zero_copy_time);
3173        println!(
3174            "  Selection Speedup:  {:.2}x",
3175            sorting_time.as_nanos() as f64 / selection_time.as_nanos() as f64
3176        );
3177        println!(
3178            "  Zero-copy Speedup:  {:.2}x\n",
3179            sorting_time.as_nanos() as f64 / zero_copy_time.as_nanos() as f64
3180        );
3181
3182        // Test with i64 (larger integers)
3183        println!("Testing with i64 data:");
3184        let int64_data: Vec<i64> = generate_random_data(size)
3185            .into_iter()
3186            .map(|x| x as i64 * 1000)
3187            .collect();
3188
3189        let mut unsorted1 = Unsorted::new();
3190        unsorted1.extend(int64_data.clone());
3191        let start = Instant::now();
3192        let _result = unsorted1.quartiles();
3193        let sorting_time = start.elapsed();
3194
3195        let mut unsorted2 = Unsorted::new();
3196        unsorted2.extend(int64_data.clone());
3197        let start = Instant::now();
3198        let _result = unsorted2.quartiles_with_selection();
3199        let selection_time = start.elapsed();
3200
3201        let mut unsorted3 = Unsorted::new();
3202        unsorted3.extend(int64_data);
3203        let start = Instant::now();
3204        let _result = unsorted3.quartiles_zero_copy();
3205        let zero_copy_time = start.elapsed();
3206
3207        println!("  Sorting:    {:?}", sorting_time);
3208        println!("  Selection:  {:?}", selection_time);
3209        println!("  Zero-copy:  {:?}", zero_copy_time);
3210        println!(
3211            "  Selection Speedup:  {:.2}x",
3212            sorting_time.as_nanos() as f64 / selection_time.as_nanos() as f64
3213        );
3214        println!(
3215            "  Zero-copy Speedup:  {:.2}x",
3216            sorting_time.as_nanos() as f64 / zero_copy_time.as_nanos() as f64
3217        );
3218    }
3219}
stats/unsorted.rs

stats/
unsorted.rs