stats/
unsorted.rs

1use num_traits::ToPrimitive;
2use rayon::iter::{IndexedParallelIterator, IntoParallelRefIterator, ParallelIterator};
3use rayon::prelude::ParallelSlice;
4use rayon::slice::ParallelSliceMut;
5
6use serde::{Deserialize, Serialize};
7
8use {crate::Commute, crate::Partial};
9
10// PARALLEL_THRESHOLD (10,000) is the minimum dataset size for rayon parallel sort.
11// The separate 10,240 threshold in cardinality estimation (5 × 2,048) is aligned to
12// cache-line-friendly chunk sizes for parallel iterator reduction.
13const PARALLEL_THRESHOLD: usize = 10_000;
14
15/// Compute the exact median on a stream of data.
16///
17/// (This has time complexity `O(nlogn)` and space complexity `O(n)`.)
18#[inline]
19pub fn median<I>(it: I) -> Option<f64>
20where
21    I: Iterator,
22    <I as Iterator>::Item: PartialOrd + ToPrimitive + Send,
23{
24    it.collect::<Unsorted<_>>().median()
25}
26
27/// Compute the median absolute deviation (MAD) on a stream of data.
28#[inline]
29pub fn mad<I>(it: I, precalc_median: Option<f64>) -> Option<f64>
30where
31    I: Iterator,
32    <I as Iterator>::Item: PartialOrd + ToPrimitive + Send + Sync,
33{
34    it.collect::<Unsorted<_>>().mad(precalc_median)
35}
36
37/// Compute the exact 1-, 2-, and 3-quartiles (Q1, Q2 a.k.a. median, and Q3) on a stream of data.
38///
39/// (This has time complexity `O(n log n)` and space complexity `O(n)`.)
40#[inline]
41pub fn quartiles<I>(it: I) -> Option<(f64, f64, f64)>
42where
43    I: Iterator,
44    <I as Iterator>::Item: PartialOrd + ToPrimitive + Send,
45{
46    it.collect::<Unsorted<_>>().quartiles()
47}
48
49/// Compute the exact mode on a stream of data.
50///
51/// (This has time complexity `O(nlogn)` and space complexity `O(n)`.)
52///
53/// If the data does not have a mode, then `None` is returned.
54#[inline]
55pub fn mode<T, I>(it: I) -> Option<T>
56where
57    T: PartialOrd + Clone + Send,
58    I: Iterator<Item = T>,
59{
60    it.collect::<Unsorted<T>>().mode()
61}
62
63/// Compute the modes on a stream of data.
64///
65/// If there is a single mode, then only that value is returned in the `Vec`
66/// however, if there are multiple values tied for occurring the most amount of times
67/// those values are returned.
68///
69/// ## Example
70/// ```
71/// use stats;
72///
73/// let vals = vec![1, 1, 2, 2, 3];
74///
75/// assert_eq!(stats::modes(vals.into_iter()), (vec![1, 2], 2, 2));
76/// ```
77/// This has time complexity `O(n)`
78///
79/// If the data does not have a mode, then an empty `Vec` is returned.
80#[inline]
81pub fn modes<T, I>(it: I) -> (Vec<T>, usize, u32)
82where
83    T: PartialOrd + Clone + Send,
84    I: Iterator<Item = T>,
85{
86    it.collect::<Unsorted<T>>().modes()
87}
88
89/// Compute the antimodes on a stream of data.
90///
91/// Antimode is the least frequent non-zero score.
92///
93/// If there is a single antimode, then only that value is returned in the `Vec`
94/// however, if there are multiple values tied for occurring the least amount of times
95/// those values are returned.
96///
97/// Only the first 10 antimodes are returned to prevent returning the whole set
98/// when cardinality = number of records (i.e. all unique values).
99///
100/// ## Example
101/// ```
102/// use stats;
103///
104/// let vals = vec![1, 1, 2, 2, 3];
105///
106/// assert_eq!(stats::antimodes(vals.into_iter()), (vec![3], 1, 1));
107/// ```
108/// This has time complexity `O(n)`
109///
110/// If the data does not have an antimode, then an empty `Vec` is returned.
111#[inline]
112pub fn antimodes<T, I>(it: I) -> (Vec<T>, usize, u32)
113where
114    T: PartialOrd + Clone + Send,
115    I: Iterator<Item = T>,
116{
117    let (antimodes_result, antimodes_count, antimodes_occurrences) =
118        it.collect::<Unsorted<T>>().antimodes();
119    (antimodes_result, antimodes_count, antimodes_occurrences)
120}
121
122/// Compute the Gini Coefficient on a stream of data.
123///
124/// The Gini Coefficient measures inequality in a distribution, ranging from 0 (perfect equality)
125/// to 1 (perfect inequality).
126///
127/// (This has time complexity `O(n log n)` and space complexity `O(n)`.)
128#[inline]
129pub fn gini<I>(it: I, precalc_sum: Option<f64>) -> Option<f64>
130where
131    I: Iterator,
132    <I as Iterator>::Item: PartialOrd + ToPrimitive + Send + Sync,
133{
134    it.collect::<Unsorted<_>>().gini(precalc_sum)
135}
136
137/// Compute the kurtosis (excess kurtosis) on a stream of data.
138///
139/// Kurtosis measures the "tailedness" of a distribution. Excess kurtosis is kurtosis - 3,
140/// where 0 indicates a normal distribution, positive values indicate heavy tails, and
141/// negative values indicate light tails.
142///
143/// (This has time complexity `O(n log n)` and space complexity `O(n)`.)
144#[inline]
145pub fn kurtosis<I>(it: I, precalc_mean: Option<f64>, precalc_variance: Option<f64>) -> Option<f64>
146where
147    I: Iterator,
148    <I as Iterator>::Item: PartialOrd + ToPrimitive + Send + Sync,
149{
150    it.collect::<Unsorted<_>>()
151        .kurtosis(precalc_mean, precalc_variance)
152}
153
154/// Compute the percentile rank of a value on a stream of data.
155///
156/// Returns the percentile rank (0-100) of the given value in the distribution.
157/// If the value is less than all values, returns 0.0. If greater than all, returns 100.0.
158///
159/// (This has time complexity `O(n log n)` and space complexity `O(n)`.)
160#[inline]
161pub fn percentile_rank<I, V>(it: I, value: V) -> Option<f64>
162where
163    I: Iterator,
164    <I as Iterator>::Item: PartialOrd + ToPrimitive + Send + Sync,
165    V: PartialOrd + ToPrimitive,
166{
167    it.collect::<Unsorted<_>>().percentile_rank(value)
168}
169
170/// Compute the Atkinson Index on a stream of data.
171///
172/// The Atkinson Index measures inequality with an inequality aversion parameter ε.
173/// It ranges from 0 (perfect equality) to 1 (perfect inequality).
174/// Higher ε values give more weight to inequality at the lower end of the distribution.
175///
176/// (This has time complexity `O(n log n)` and space complexity `O(n)`.)
177#[inline]
178pub fn atkinson<I>(
179    it: I,
180    epsilon: f64,
181    precalc_mean: Option<f64>,
182    precalc_geometric_sum: Option<f64>,
183) -> Option<f64>
184where
185    I: Iterator,
186    <I as Iterator>::Item: PartialOrd + ToPrimitive + Send + Sync,
187{
188    it.collect::<Unsorted<_>>()
189        .atkinson(epsilon, precalc_mean, precalc_geometric_sum)
190}
191
192fn median_on_sorted<T>(data: &[T]) -> Option<f64>
193where
194    T: PartialOrd + ToPrimitive,
195{
196    Some(match data.len() {
197        // Empty slice case - return None early
198        0 => return None,
199        // Single element case - return that element converted to f64
200        1 => data.first()?.to_f64()?,
201        // Even length case - average the two middle elements
202        len if len.is_multiple_of(2) => {
203            let idx = len / 2;
204            // Safety: we know these indices are valid because we checked len is even and non-zero,
205            // so idx-1 and idx are valid indices into data
206            let v1 = unsafe { data.get_unchecked(idx - 1) }.to_f64()?;
207            let v2 = unsafe { data.get_unchecked(idx) }.to_f64()?;
208            f64::midpoint(v1, v2)
209        }
210        // Odd length case - return the middle element
211        // Safety: we know the index is within bounds
212        len => unsafe { data.get_unchecked(len / 2) }.to_f64()?,
213    })
214}
215
216fn mad_on_sorted<T>(data: &[T], precalc_median: Option<f64>) -> Option<f64>
217where
218    T: Sync + PartialOrd + ToPrimitive,
219{
220    if data.is_empty() {
221        return None;
222    }
223    let median_obs = precalc_median.unwrap_or_else(|| median_on_sorted(data).unwrap());
224
225    // Use adaptive parallel processing based on data size
226    let mut abs_diff_vec: Vec<f64> = if data.len() < PARALLEL_THRESHOLD {
227        // Sequential processing for small datasets
228        // Iterator collect enables TrustedLen optimization, eliminating per-element bounds checks
229        data.iter()
230            // SAFETY: to_f64() always returns Some for standard numeric types (f32/f64, i/u 8-64)
231            .map(|x| (median_obs - unsafe { x.to_f64().unwrap_unchecked() }).abs())
232            .collect()
233    } else {
234        // Parallel processing for large datasets
235        data.par_iter()
236            // SAFETY: to_f64() always returns Some for standard numeric types
237            .map(|x| (median_obs - unsafe { x.to_f64().unwrap_unchecked() }).abs())
238            .collect()
239    };
240
241    // Use select_nth_unstable to find the median in O(n) instead of O(n log n) full sort
242    let len = abs_diff_vec.len();
243    let mid = len / 2;
244    let cmp = |a: &f64, b: &f64| a.total_cmp(b);
245
246    abs_diff_vec.select_nth_unstable_by(mid, cmp);
247
248    if len.is_multiple_of(2) {
249        // Even length: need both mid-1 and mid elements
250        let right = abs_diff_vec[mid];
251        // The left partition [0..mid] contains elements <= abs_diff_vec[mid],
252        // so we can find the max of the left partition for mid-1
253        let left = abs_diff_vec[..mid]
254            .iter()
255            .max_by(|a, b| cmp(a, b))
256            .copied()?;
257        Some(f64::midpoint(left, right))
258    } else {
259        Some(abs_diff_vec[mid])
260    }
261}
262
263fn gini_on_sorted<T>(data: &[Partial<T>], precalc_sum: Option<f64>) -> Option<f64>
264where
265    T: Sync + PartialOrd + ToPrimitive,
266{
267    let len = data.len();
268
269    // Early return for empty data
270    if len == 0 {
271        return None;
272    }
273
274    // Single element case: perfect equality, Gini = 0
275    if len == 1 {
276        return Some(0.0);
277    }
278
279    // Gini coefficient is only defined for non-negative distributions.
280    // Since data is sorted, check the first (smallest) element.
281    // SAFETY: len > 1 guaranteed by checks above
282    let first_val = unsafe { data.get_unchecked(0).0.to_f64().unwrap_unchecked() };
283    if first_val < 0.0 {
284        return None;
285    }
286
287    // Compute sum and weighted sum.
288    // When precalc_sum is provided, only compute weighted_sum in a single pass.
289    // When not provided, fuse both computations into a single pass over the data
290    // to halve cache pressure (following the fold/reduce pattern used in kurtosis).
291    let (sum, weighted_sum) = if let Some(precalc) = precalc_sum {
292        if precalc < 0.0 {
293            return None;
294        }
295        // Only need weighted_sum — single pass
296        let weighted_sum = if len < PARALLEL_THRESHOLD {
297            let mut weighted_sum = 0.0;
298            for (i, x) in data.iter().enumerate() {
299                // SAFETY: to_f64() always returns Some for standard numeric types
300                let val = unsafe { x.0.to_f64().unwrap_unchecked() };
301                weighted_sum = ((i + 1) as f64).mul_add(val, weighted_sum);
302            }
303            weighted_sum
304        } else {
305            data.par_iter()
306                .enumerate()
307                .map(|(i, x)| {
308                    // SAFETY: to_f64() always returns Some for standard numeric types
309                    let val = unsafe { x.0.to_f64().unwrap_unchecked() };
310                    ((i + 1) as f64).mul_add(val, 0.0)
311                })
312                .sum()
313        };
314        (precalc, weighted_sum)
315    } else if len < PARALLEL_THRESHOLD {
316        // Fused single pass: compute both sum and weighted_sum together
317        let mut sum = 0.0;
318        let mut weighted_sum = 0.0;
319        for (i, x) in data.iter().enumerate() {
320            // SAFETY: to_f64() always returns Some for standard numeric types (f32/f64, i/u 8-64)
321            let val = unsafe { x.0.to_f64().unwrap_unchecked() };
322            sum += val;
323            weighted_sum = ((i + 1) as f64).mul_add(val, weighted_sum);
324        }
325        (sum, weighted_sum)
326    } else {
327        // Fused parallel single pass using fold/reduce
328        data.par_iter()
329            .enumerate()
330            .fold(
331                || (0.0_f64, 0.0_f64),
332                |acc, (i, x)| {
333                    // SAFETY: to_f64() always returns Some for standard numeric types
334                    let val = unsafe { x.0.to_f64().unwrap_unchecked() };
335                    (acc.0 + val, ((i + 1) as f64).mul_add(val, acc.1))
336                },
337            )
338            .reduce(|| (0.0, 0.0), |a, b| (a.0 + b.0, a.1 + b.1))
339    };
340
341    // If sum is zero, Gini is undefined
342    if sum == 0.0 {
343        return None;
344    }
345
346    // Compute Gini coefficient using the formula:
347    // G = (2 * Σ(i * y_i)) / (n * Σ(y_i)) - (n + 1) / n
348    // where i is 1-indexed rank and y_i are sorted values
349    let n = len as f64;
350    let gini = 2.0f64.mul_add(weighted_sum / (n * sum), -(n + 1.0) / n);
351
352    Some(gini)
353}
354
355fn kurtosis_on_sorted<T>(
356    data: &[Partial<T>],
357    precalc_mean: Option<f64>,
358    precalc_variance: Option<f64>,
359) -> Option<f64>
360where
361    T: Sync + PartialOrd + ToPrimitive,
362{
363    let len = data.len();
364
365    // Need at least 4 elements for meaningful kurtosis
366    if len < 4 {
367        return None;
368    }
369
370    // Use pre-calculated mean if provided, otherwise compute it
371    let mean = if let Some(precalc) = precalc_mean {
372        precalc
373    } else {
374        let sum: f64 = if len < PARALLEL_THRESHOLD {
375            // Iterator sum enables auto-vectorization (SIMD) by the compiler
376            data.iter()
377                // SAFETY: to_f64() always returns Some for standard numeric types (f32/f64, i/u 8-64)
378                .map(|x| unsafe { x.0.to_f64().unwrap_unchecked() })
379                .sum()
380        } else {
381            data.par_iter()
382                // SAFETY: to_f64() always returns Some for standard numeric types
383                .map(|x| unsafe { x.0.to_f64().unwrap_unchecked() })
384                .sum()
385        };
386        sum / len as f64
387    };
388
389    // Compute variance_sq and fourth_power_sum
390    // If variance is provided, we can compute variance_sq directly (variance_sq = variance^2)
391    // Otherwise, we need to compute variance from the data
392    let (variance_sq, fourth_power_sum) = if let Some(variance) = precalc_variance {
393        // Negative variance is invalid (possible floating-point rounding artifact)
394        if variance < 0.0 {
395            return None;
396        }
397        // Use pre-calculated variance: variance_sq = variance^2
398        let variance_sq = variance * variance;
399
400        // Still need to compute fourth_power_sum
401        let fourth_power_sum = if len < PARALLEL_THRESHOLD {
402            let mut sum = 0.0;
403            for x in data {
404                // SAFETY: to_f64() always returns Some for standard numeric types
405                let val = unsafe { x.0.to_f64().unwrap_unchecked() };
406                let diff = val - mean;
407                let diff_sq = diff * diff;
408                sum = diff_sq.mul_add(diff_sq, sum);
409            }
410            sum
411        } else {
412            data.par_iter()
413                .map(|x| {
414                    // SAFETY: to_f64() always returns Some for standard numeric types
415                    let val = unsafe { x.0.to_f64().unwrap_unchecked() };
416                    let diff = val - mean;
417                    let diff_sq = diff * diff;
418                    diff_sq * diff_sq
419                })
420                .sum()
421        };
422
423        (variance_sq, fourth_power_sum)
424    } else {
425        // Compute both variance_sum and fourth_power_sum
426        let (variance_sum, fourth_power_sum) = if len < PARALLEL_THRESHOLD {
427            let mut variance_sum = 0.0;
428            let mut fourth_power_sum = 0.0;
429
430            for x in data {
431                // SAFETY: to_f64() always returns Some for standard numeric types
432                let val = unsafe { x.0.to_f64().unwrap_unchecked() };
433                let diff = val - mean;
434                let diff_sq = diff * diff;
435                variance_sum += diff_sq;
436                fourth_power_sum = diff_sq.mul_add(diff_sq, fourth_power_sum);
437            }
438
439            (variance_sum, fourth_power_sum)
440        } else {
441            // Single pass computing both sums simultaneously
442            data.par_iter()
443                .fold(
444                    || (0.0_f64, 0.0_f64),
445                    |acc, x| {
446                        // SAFETY: to_f64() always returns Some for standard numeric types
447                        let val = unsafe { x.0.to_f64().unwrap_unchecked() };
448                        let diff = val - mean;
449                        let diff_sq = diff * diff;
450                        (acc.0 + diff_sq, diff_sq.mul_add(diff_sq, acc.1))
451                    },
452                )
453                .reduce(|| (0.0, 0.0), |a, b| (a.0 + b.0, a.1 + b.1))
454        };
455
456        let variance = variance_sum / len as f64;
457
458        // If variance is zero, all values are the same, kurtosis is undefined
459        if variance == 0.0 {
460            return None;
461        }
462
463        let variance_sq = variance * variance;
464        (variance_sq, fourth_power_sum)
465    };
466
467    // If variance_sq is zero, all values are the same, kurtosis is undefined
468    if variance_sq == 0.0 {
469        return None;
470    }
471
472    let n = len as f64;
473
474    // Sample excess kurtosis formula:
475    // kurtosis = (n(n+1) * Σ((x_i - mean)⁴)) / ((n-1)(n-2)(n-3) * variance²) - 3(n-1)²/((n-2)(n-3))
476    let denominator = (n - 1.0) * (n - 2.0) * (n - 3.0);
477    let adjustment = 3.0 * (n - 1.0) * (n - 1.0) / denominator;
478    let kurtosis = (n * (n + 1.0) * fourth_power_sum).mul_add(1.0 / (denominator * variance_sq), -adjustment);
479
480    Some(kurtosis)
481}
482
483fn percentile_rank_on_sorted<T, V>(data: &[Partial<T>], value: &V) -> Option<f64>
484where
485    T: PartialOrd + ToPrimitive,
486    V: PartialOrd + ToPrimitive,
487{
488    let len = data.len();
489
490    if len == 0 {
491        return None;
492    }
493
494    let value_f64 = value.to_f64()?;
495
496    // Binary search to find the position where value would be inserted
497    // This gives us the number of values <= value
498    let count_leq = data.binary_search_by(|x| {
499        x.0.to_f64()
500            .unwrap_or(f64::NAN)
501            .partial_cmp(&value_f64)
502            .unwrap_or(std::cmp::Ordering::Less)
503    });
504
505    let count = match count_leq {
506        Ok(idx) => {
507            // Value found at idx — use partition_point (O(log n)) to find the upper bound
508            // of equal values instead of a linear scan
509            let upper = data[idx + 1..].partition_point(|x| {
510                x.0.to_f64()
511                    .is_some_and(|v| v.total_cmp(&value_f64).is_le())
512            });
513            idx + 1 + upper
514        }
515        Err(idx) => idx, // Number of values less than value
516    };
517
518    // Percentile rank = (count / n) * 100
519    Some((count as f64 / len as f64) * 100.0)
520}
521
522fn atkinson_on_sorted<T>(
523    data: &[Partial<T>],
524    epsilon: f64,
525    precalc_mean: Option<f64>,
526    precalc_geometric_sum: Option<f64>,
527) -> Option<f64>
528where
529    T: Sync + PartialOrd + ToPrimitive,
530{
531    let len = data.len();
532
533    // Early return for empty data
534    if len == 0 {
535        return None;
536    }
537
538    // Single element case: perfect equality, Atkinson = 0
539    if len == 1 {
540        return Some(0.0);
541    }
542
543    // Epsilon must be non-negative
544    if epsilon < 0.0 {
545        return None;
546    }
547
548    // Use pre-calculated mean if provided, otherwise compute it
549    let mean = if let Some(precalc) = precalc_mean {
550        precalc
551    } else {
552        let sum: f64 = if len < PARALLEL_THRESHOLD {
553            // Iterator sum enables auto-vectorization (SIMD) by the compiler
554            data.iter()
555                // SAFETY: to_f64() always returns Some for standard numeric types (f32/f64, i/u 8-64)
556                .map(|x| unsafe { x.0.to_f64().unwrap_unchecked() })
557                .sum()
558        } else {
559            data.par_iter()
560                // SAFETY: to_f64() always returns Some for standard numeric types
561                .map(|x| unsafe { x.0.to_f64().unwrap_unchecked() })
562                .sum()
563        };
564        sum / len as f64
565    };
566
567    // If mean is zero, Atkinson is undefined
568    if mean == 0.0 {
569        return None;
570    }
571
572    // Handle special case: epsilon = 1 (uses geometric mean)
573    if (epsilon - 1.0).abs() < 1e-10 {
574        // A_1 = 1 - (geometric_mean / mean)
575        let geometric_sum: f64 = if let Some(precalc) = precalc_geometric_sum {
576            precalc
577        } else if len < PARALLEL_THRESHOLD {
578            let mut sum = 0.0;
579            for x in data {
580                // SAFETY: to_f64() always returns Some for standard numeric types
581                let val = unsafe { x.0.to_f64().unwrap_unchecked() };
582                if val <= 0.0 {
583                    // Geometric mean undefined for non-positive values
584                    return None;
585                }
586                sum += val.ln();
587            }
588            sum
589        } else {
590            data.par_iter()
591                .map(|x| {
592                    // SAFETY: to_f64() always returns Some for standard numeric types
593                    let val = unsafe { x.0.to_f64().unwrap_unchecked() };
594                    if val <= 0.0 {
595                        return f64::NAN;
596                    }
597                    val.ln()
598                })
599                .sum()
600        };
601
602        if geometric_sum.is_nan() {
603            return None;
604        }
605
606        let geometric_mean = (geometric_sum / len as f64).exp();
607        return Some(1.0 - geometric_mean / mean);
608    }
609
610    // General case: epsilon != 1
611    // A_ε = 1 - (1/n * Σ((x_i/mean)^(1-ε)))^(1/(1-ε))
612    let exponent = 1.0 - epsilon;
613
614    let sum_powered: f64 = if len < PARALLEL_THRESHOLD {
615        let mut sum = 0.0;
616        for x in data {
617            // SAFETY: to_f64() always returns Some for standard numeric types
618            let val = unsafe { x.0.to_f64().unwrap_unchecked() };
619            if val < 0.0 {
620                // Negative values with non-integer exponent are undefined
621                return None;
622            }
623            let ratio = val / mean;
624            sum += ratio.powf(exponent);
625        }
626        sum
627    } else {
628        data.par_iter()
629            .map(|x| {
630                // SAFETY: to_f64() always returns Some for standard numeric types
631                let val = unsafe { x.0.to_f64().unwrap_unchecked() };
632                if val < 0.0 {
633                    return f64::NAN;
634                }
635                let ratio = val / mean;
636                ratio.powf(exponent)
637            })
638            .sum()
639    };
640
641    if sum_powered.is_nan() || sum_powered <= 0.0 {
642        return None;
643    }
644
645    let atkinson = 1.0 - (sum_powered / len as f64).powf(1.0 / exponent);
646    Some(atkinson)
647}
648
649/// Selection algorithm to find the k-th smallest element in O(n) average time.
650/// This is an implementation of quickselect algorithm.
651#[cfg(test)]
652fn quickselect<T>(data: &mut [Partial<T>], k: usize) -> Option<&T>
653where
654    T: PartialOrd,
655{
656    if data.is_empty() || k >= data.len() {
657        return None;
658    }
659
660    let mut left = 0;
661    let mut right = data.len() - 1;
662
663    loop {
664        if left == right {
665            return Some(&data[left].0);
666        }
667
668        // Use median-of-three pivot selection for better performance
669        let pivot_idx = median_of_three_pivot(data, left, right);
670        let pivot_idx = partition(data, left, right, pivot_idx);
671
672        match k.cmp(&pivot_idx) {
673            std::cmp::Ordering::Equal => return Some(&data[pivot_idx].0),
674            std::cmp::Ordering::Less => right = pivot_idx - 1,
675            std::cmp::Ordering::Greater => left = pivot_idx + 1,
676        }
677    }
678}
679
680/// Select the median of three elements as pivot for better quickselect performance
681#[cfg(test)]
682fn median_of_three_pivot<T>(data: &[Partial<T>], left: usize, right: usize) -> usize
683where
684    T: PartialOrd,
685{
686    let mid = left + (right - left) / 2;
687
688    if data[left] <= data[mid] {
689        if data[mid] <= data[right] {
690            mid
691        } else if data[left] <= data[right] {
692            right
693        } else {
694            left
695        }
696    } else if data[left] <= data[right] {
697        left
698    } else if data[mid] <= data[right] {
699        right
700    } else {
701        mid
702    }
703}
704
705/// Partition function for quickselect
706#[cfg(test)]
707fn partition<T>(data: &mut [Partial<T>], left: usize, right: usize, pivot_idx: usize) -> usize
708where
709    T: PartialOrd,
710{
711    // Move pivot to end
712    data.swap(pivot_idx, right);
713    let mut store_idx = left;
714
715    // Move all elements smaller than pivot to the left
716    // Cache pivot position for better cache locality (access data[right] directly each time)
717    for i in left..right {
718        // Safety: i, store_idx, and right are guaranteed to be in bounds
719        // Compare directly with pivot at data[right] - compiler should optimize this access
720        if unsafe { data.get_unchecked(i) <= data.get_unchecked(right) } {
721            data.swap(i, store_idx);
722            store_idx += 1;
723        }
724    }
725
726    // Move pivot to its final place
727    data.swap(store_idx, right);
728    store_idx
729}
730
731// This implementation follows Method 3 from https://en.wikipedia.org/wiki/Quartile
732// It divides data into quarters based on the length n = 4k + r where r is remainder.
733// For each remainder case (0,1,2,3), it uses different formulas to compute Q1, Q2, Q3.
734fn quartiles_on_sorted<T>(data: &[Partial<T>]) -> Option<(f64, f64, f64)>
735where
736    T: PartialOrd + ToPrimitive,
737{
738    let len = data.len();
739
740    // Early return for small arrays
741    match len {
742        0..=2 => return None,
743        3 => {
744            return Some(
745                // SAFETY: We know these indices are valid because len == 3
746                unsafe {
747                    (
748                        data.get_unchecked(0).0.to_f64()?,
749                        data.get_unchecked(1).0.to_f64()?,
750                        data.get_unchecked(2).0.to_f64()?,
751                    )
752                },
753            );
754        }
755        _ => {}
756    }
757
758    // Calculate k and remainder in one division
759    let k = len / 4;
760    let remainder = len % 4;
761
762    // SAFETY: All index calculations below are guaranteed to be in bounds
763    // because we've verified len >= 4 above and k is len/4
764    unsafe {
765        Some(match remainder {
766            0 => {
767                // Let data = {x_i}_{i=0..4k} where k is positive integer.
768                // Median q2 = (x_{2k-1} + x_{2k}) / 2.
769                // If we divide data into two parts {x_i < q2} as L and
770                // {x_i > q2} as R, #L == #R == 2k holds true. Thus,
771                // q1 = (x_{k-1} + x_{k}) / 2 and q3 = (x_{3k-1} + x_{3k}) / 2.
772                // =============
773                // Simply put: Length is multiple of 4 (4k)
774                // q1 = (x_{k-1} + x_k) / 2
775                // q2 = (x_{2k-1} + x_{2k}) / 2
776                // q3 = (x_{3k-1} + x_{3k}) / 2
777                let q1 = f64::midpoint(
778                    data.get_unchecked(k - 1).0.to_f64()?,
779                    data.get_unchecked(k).0.to_f64()?,
780                );
781                let q2 = f64::midpoint(
782                    data.get_unchecked(2 * k - 1).0.to_f64()?,
783                    data.get_unchecked(2 * k).0.to_f64()?,
784                );
785                let q3 = f64::midpoint(
786                    data.get_unchecked(3 * k - 1).0.to_f64()?,
787                    data.get_unchecked(3 * k).0.to_f64()?,
788                );
789                (q1, q2, q3)
790            }
791            1 => {
792                // Let data = {x_i}_{i=0..4k+1} where k is positive integer.
793                // Median q2 = x_{2k}.
794                // If we divide data other than q2 into two parts {x_i < q2}
795                // as L and {x_i > q2} as R, #L == #R == 2k holds true. Thus,
796                // q1 = (x_{k-1} + x_{k}) / 2 and q3 = (x_{3k} + x_{3k+1}) / 2.
797                // =============
798                // Simply put: Length is 4k + 1
799                // q1 = (x_{k-1} + x_k) / 2
800                // q2 = x_{2k}
801                // q3 = (x_{3k} + x_{3k+1}) / 2
802                let q1 = f64::midpoint(
803                    data.get_unchecked(k - 1).0.to_f64()?,
804                    data.get_unchecked(k).0.to_f64()?,
805                );
806                let q2 = data.get_unchecked(2 * k).0.to_f64()?;
807                let q3 = f64::midpoint(
808                    data.get_unchecked(3 * k).0.to_f64()?,
809                    data.get_unchecked(3 * k + 1).0.to_f64()?,
810                );
811                (q1, q2, q3)
812            }
813            2 => {
814                // Let data = {x_i}_{i=0..4k+2} where k is positive integer.
815                // Median q2 = (x_{(2k+1)-1} + x_{2k+1}) / 2.
816                // If we divide data into two parts {x_i < q2} as L and
817                // {x_i > q2} as R, it's true that #L == #R == 2k+1.
818                // Thus, q1 = x_{k} and q3 = x_{3k+1}.
819                // =============
820                // Simply put: Length is 4k + 2
821                // q1 = x_k
822                // q2 = (x_{2k} + x_{2k+1}) / 2
823                // q3 = x_{3k+1}
824                let q1 = data.get_unchecked(k).0.to_f64()?;
825                let q2 = f64::midpoint(
826                    data.get_unchecked(2 * k).0.to_f64()?,
827                    data.get_unchecked(2 * k + 1).0.to_f64()?,
828                );
829                let q3 = data.get_unchecked(3 * k + 1).0.to_f64()?;
830                (q1, q2, q3)
831            }
832            _ => {
833                // Let data = {x_i}_{i=0..4k+3} where k is positive integer.
834                // Median q2 = x_{2k+1}.
835                // If we divide data other than q2 into two parts {x_i < q2}
836                // as L and {x_i > q2} as R, #L == #R == 2k+1 holds true.
837                // Thus, q1 = x_{k} and q3 = x_{3k+2}.
838                // =============
839                // Simply put: Length is 4k + 3
840                // q1 = x_k
841                // q2 = x_{2k+1}
842                // q3 = x_{3k+2}
843                let q1 = data.get_unchecked(k).0.to_f64()?;
844                let q2 = data.get_unchecked(2 * k + 1).0.to_f64()?;
845                let q3 = data.get_unchecked(3 * k + 2).0.to_f64()?;
846                (q1, q2, q3)
847            }
848        })
849    }
850}
851
852/// Zero-copy quartiles computation using index-based selection.
853/// This avoids copying data by working with an array of indices.
854///
855/// Uses `select_nth_unstable_by` on the indices array, which partitions in-place.
856/// After selecting position p, elements at indices [0..p] are <= the p-th element
857/// and elements at [p+1..] are >= it. By selecting positions in ascending order,
858/// each subsequent selection only needs to search within the right partition,
859/// avoiding redundant O(n) resets.
860fn quartiles_with_zero_copy_selection<T>(data: &[Partial<T>]) -> Option<(f64, f64, f64)>
861where
862    T: PartialOrd + ToPrimitive,
863{
864    let len = data.len();
865
866    // Early return for small arrays
867    match len {
868        0..=2 => return None,
869        3 => {
870            let mut indices: Vec<usize> = (0..3).collect();
871            let cmp =
872                |a: &usize, b: &usize| data[*a].partial_cmp(&data[*b]).unwrap_or(std::cmp::Ordering::Less);
873            indices.sort_unstable_by(cmp);
874            let min_val = data[indices[0]].0.to_f64()?;
875            let med_val = data[indices[1]].0.to_f64()?;
876            let max_val = data[indices[2]].0.to_f64()?;
877            return Some((min_val, med_val, max_val));
878        }
879        _ => {}
880    }
881
882    let k = len / 4;
883    let remainder = len % 4;
884
885    let mut indices: Vec<usize> = (0..len).collect();
886    let cmp = |a: &usize, b: &usize| {
887        data[*a]
888            .partial_cmp(&data[*b])
889            .unwrap_or(std::cmp::Ordering::Less)
890    };
891
892    // Collect the unique positions we need in ascending order.
893    // By selecting in ascending order, each select_nth_unstable_by partitions
894    // the array so subsequent selections operate on progressively smaller slices.
895    // We deduplicate because adjacent quartile boundaries can overlap for small k.
896    let raw_positions: Vec<usize> = match remainder {
897        0 => vec![k - 1, k, 2 * k - 1, 2 * k, 3 * k - 1, 3 * k],
898        1 => vec![k - 1, k, 2 * k, 3 * k, 3 * k + 1],
899        2 => vec![k, 2 * k, 2 * k + 1, 3 * k + 1],
900        _ => vec![k, 2 * k + 1, 3 * k + 2],
901    };
902
903    let mut unique_positions = raw_positions.clone();
904    unique_positions.dedup();
905
906    // Select each unique position in ascending order, narrowing the search range
907    let mut start = 0;
908    for &pos in &unique_positions {
909        indices[start..].select_nth_unstable_by(pos - start, &cmp);
910        start = pos + 1;
911    }
912
913    // Now read all needed values (including duplicates) from the partitioned indices
914    let values: Vec<f64> = raw_positions
915        .iter()
916        .map(|&pos| data[indices[pos]].0.to_f64())
917        .collect::<Option<Vec<_>>>()?;
918
919    match remainder {
920        0 => {
921            let q1 = f64::midpoint(values[0], values[1]);
922            let q2 = f64::midpoint(values[2], values[3]);
923            let q3 = f64::midpoint(values[4], values[5]);
924            Some((q1, q2, q3))
925        }
926        1 => {
927            let q1 = f64::midpoint(values[0], values[1]);
928            let q2 = values[2];
929            let q3 = f64::midpoint(values[3], values[4]);
930            Some((q1, q2, q3))
931        }
932        2 => {
933            let q1 = values[0];
934            let q2 = f64::midpoint(values[1], values[2]);
935            let q3 = values[3];
936            Some((q1, q2, q3))
937        }
938        _ => Some((values[0], values[1], values[2])),
939    }
940}
941
942fn mode_on_sorted<T, I>(it: I) -> Option<T>
943where
944    T: PartialOrd,
945    I: Iterator<Item = T>,
946{
947    use std::cmp::Ordering;
948
949    // This approach to computing the mode works very nicely when the
950    // number of samples is large and is close to its cardinality.
951    // In other cases, a hashmap would be much better.
952    // But really, how can we know this when given an arbitrary stream?
953    // Might just switch to a hashmap to track frequencies. That would also
954    // be generally useful for discovering the cardinality of a sample.
955    let (mut mode, mut next) = (None, None);
956    let (mut mode_count, mut next_count) = (0usize, 0usize);
957    for x in it {
958        if mode.as_ref() == Some(&x) {
959            mode_count += 1;
960        } else if next.as_ref() == Some(&x) {
961            next_count += 1;
962        } else {
963            next = Some(x);
964            next_count = 0;
965        }
966
967        match next_count.cmp(&mode_count) {
968            Ordering::Greater => {
969                mode = next;
970                mode_count = next_count;
971                next = None;
972                next_count = 0;
973            }
974            Ordering::Equal => {
975                mode = None;
976                mode_count = 0;
977            }
978            Ordering::Less => {}
979        }
980    }
981    mode
982}
983
984/// Computes both modes and antimodes from a sorted slice of values.
985/// This version works with references to avoid unnecessary cloning.
986///
987/// # Arguments
988///
989/// * `data` - A sorted slice of values
990///
991/// # Notes
992///
993/// - Mode is the most frequently occurring value(s)
994/// - Antimode is the least frequently occurring value(s)
995/// - Only returns up to 10 antimodes to avoid returning the full set when all values are unique
996/// - For empty slices, returns empty vectors and zero counts
997/// - For single value slices, returns that value as the mode and empty antimode
998/// - When all values occur exactly once, returns empty mode and up to 10 values as antimodes
999///
1000/// # Returns
1001///
1002/// A tuple containing:
1003/// * Modes information: `(Vec<T>, usize, u32)` where:
1004///   - Vec<T>: Vector containing the mode values
1005///   - usize: Number of modes found
1006///   - u32: Frequency/count of the mode values
1007/// * Antimodes information: `(Vec<T>, usize, u32)` where:
1008///   - Vec<T>: Vector containing up to 10 antimode values
1009///   - usize: Total number of antimodes
1010///   - u32: Frequency/count of the antimode values
1011#[allow(clippy::type_complexity)]
1012#[inline]
1013fn modes_and_antimodes_on_sorted_slice<T>(
1014    data: &[Partial<T>],
1015) -> ((Vec<T>, usize, u32), (Vec<T>, usize, u32))
1016where
1017    T: PartialOrd + Clone,
1018{
1019    let size = data.len();
1020
1021    // Early return for empty slice
1022    if size == 0 {
1023        return ((Vec::new(), 0, 0), (Vec::new(), 0, 0));
1024    }
1025
1026    // Estimate capacity using integer square root of size
1027    // Integer square root using Newton's method (faster than floating point sqrt)
1028    let mut x = size;
1029    let mut y = x.div_ceil(2);
1030    while y < x {
1031        x = y;
1032        y = usize::midpoint(x, size / x);
1033    }
1034    let sqrt_size = x;
1035    let mut runs: Vec<(&T, u32)> = Vec::with_capacity(sqrt_size.clamp(16, 1_000));
1036
1037    let mut current_value = &data[0].0;
1038    let mut current_count = 1;
1039    let mut highest_count = 1;
1040    let mut lowest_count = u32::MAX;
1041
1042    // Count consecutive runs - optimized to reduce allocations
1043    for x in data.iter().skip(1) {
1044        if x.0 == *current_value {
1045            current_count += 1;
1046            highest_count = highest_count.max(current_count);
1047        } else {
1048            runs.push((current_value, current_count));
1049            lowest_count = lowest_count.min(current_count);
1050            current_value = &x.0;
1051            current_count = 1;
1052        }
1053    }
1054    runs.push((current_value, current_count));
1055    lowest_count = lowest_count.min(current_count);
1056
1057    // Early return if only one unique value
1058    if runs.len() == 1 {
1059        let (val, count) = runs.pop().unwrap();
1060        return ((vec![val.clone()], 1, count), (Vec::new(), 0, 0));
1061    }
1062
1063    // Special case: if all values appear exactly once
1064    if highest_count == 1 {
1065        let antimodes_count = runs.len().min(10);
1066        let total_count = runs.len();
1067        let mut antimodes = Vec::with_capacity(antimodes_count);
1068        for (val, _) in runs.into_iter().take(antimodes_count) {
1069            antimodes.push(val.clone());
1070        }
1071        // For modes: empty, count 0, occurrences 0 (not 1, 1)
1072        return ((Vec::new(), 0, 0), (antimodes, total_count, 1));
1073    }
1074
1075    // Collect modes and antimodes directly in a single pass, cloning values immediately
1076    // instead of collecting indices first and then cloning in a second pass
1077    let estimated_modes = (runs.len() / 10).clamp(1, 10);
1078    let estimated_antimodes = 10.min(runs.len());
1079
1080    let mut modes_result = Vec::with_capacity(estimated_modes);
1081    let mut antimodes_result = Vec::with_capacity(estimated_antimodes);
1082    let mut mode_count = 0;
1083    let mut antimodes_count = 0;
1084    let mut antimodes_collected = 0_u32;
1085
1086    for (val, count) in &runs {
1087        if *count == highest_count {
1088            modes_result.push((*val).clone());
1089            mode_count += 1;
1090        }
1091        if *count == lowest_count {
1092            antimodes_count += 1;
1093            if antimodes_collected < 10 {
1094                antimodes_result.push((*val).clone());
1095                antimodes_collected += 1;
1096            }
1097        }
1098    }
1099
1100    (
1101        (modes_result, mode_count, highest_count),
1102        (antimodes_result, antimodes_count, lowest_count),
1103    )
1104}
1105
1106/// A commutative data structure for lazily sorted sequences of data.
1107///
1108/// The sort does not occur until statistics need to be computed.
1109///
1110/// Note that this works on types that do not define a total ordering like
1111/// `f32` and `f64`. When an ordering is not defined, an arbitrary order
1112/// is returned.
1113#[allow(clippy::unsafe_derive_deserialize)]
1114#[derive(Clone, Serialize, Deserialize, Eq, PartialEq)]
1115pub struct Unsorted<T> {
1116    sorted: bool,
1117    data: Vec<Partial<T>>,
1118}
1119
1120impl<T: PartialOrd + Send> Unsorted<T> {
1121    /// Create initial empty state.
1122    #[inline]
1123    #[must_use]
1124    pub fn new() -> Unsorted<T> {
1125        Default::default()
1126    }
1127
1128    /// Add a new element to the set.
1129    #[allow(clippy::inline_always)]
1130    #[inline(always)]
1131    pub fn add(&mut self, v: T) {
1132        self.sorted = false;
1133        self.data.push(Partial(v));
1134    }
1135
1136    /// Return the number of data points.
1137    #[inline]
1138    #[must_use]
1139    pub const fn len(&self) -> usize {
1140        self.data.len()
1141    }
1142
1143    #[inline]
1144    #[must_use]
1145    pub const fn is_empty(&self) -> bool {
1146        self.data.is_empty()
1147    }
1148
1149    #[inline]
1150    fn sort(&mut self) {
1151        if !self.sorted {
1152            // Use sequential sort for small datasets (< 10k elements) to avoid parallel overhead
1153            if self.data.len() < PARALLEL_THRESHOLD {
1154                self.data.sort_unstable();
1155            } else {
1156                self.data.par_sort_unstable();
1157            }
1158            self.sorted = true;
1159        }
1160    }
1161
1162    #[inline]
1163    const fn already_sorted(&mut self) {
1164        self.sorted = true;
1165    }
1166
1167    /// Add multiple elements efficiently
1168    #[inline]
1169    pub fn add_bulk(&mut self, values: Vec<T>) {
1170        self.sorted = false;
1171        self.data.reserve(values.len());
1172        self.data.extend(values.into_iter().map(Partial));
1173    }
1174
1175    /// Shrink capacity to fit current data
1176    #[inline]
1177    pub fn shrink_to_fit(&mut self) {
1178        self.data.shrink_to_fit();
1179    }
1180
1181    /// Create with specific capacity
1182    #[inline]
1183    #[must_use]
1184    pub fn with_capacity(capacity: usize) -> Self {
1185        Unsorted {
1186            sorted: true,
1187            data: Vec::with_capacity(capacity),
1188        }
1189    }
1190
1191    /// Add a value assuming it's greater than all existing values
1192    #[inline]
1193    pub fn push_ascending(&mut self, value: T) {
1194        if let Some(last) = self.data.last() {
1195            debug_assert!(last.0 <= value, "Value must be >= than last element");
1196        }
1197        self.data.push(Partial(value));
1198        // Data remains sorted
1199    }
1200}
1201
1202impl<T: PartialOrd + PartialEq + Clone + Send + Sync> Unsorted<T> {
1203    #[inline]
1204    /// Returns the cardinality of the data.
1205    /// Set `sorted` to `true` if the data is already sorted.
1206    /// Set `parallel_threshold` to `0` to force sequential processing.
1207    /// Set `parallel_threshold` to `1` to use the default parallel threshold (`10_000`).
1208    /// Set `parallel_threshold` to `2` to force parallel processing.
1209    /// Set `parallel_threshold` to any other value to use a custom parallel threshold
1210    /// greater than the default threshold of `10_000`.
1211    pub fn cardinality(&mut self, sorted: bool, parallel_threshold: usize) -> u64 {
1212        const CHUNK_SIZE: usize = 2048; // Process data in chunks of 2048 elements
1213        const DEFAULT_PARALLEL_THRESHOLD: usize = 10_240; // multiple of 2048
1214
1215        let len = self.data.len();
1216        match len {
1217            0 => return 0,
1218            1 => return 1,
1219            _ => {}
1220        }
1221
1222        if sorted {
1223            self.already_sorted();
1224        } else {
1225            self.sort();
1226        }
1227
1228        let use_parallel = parallel_threshold != 0
1229            && (parallel_threshold == 1
1230                || len > parallel_threshold.max(DEFAULT_PARALLEL_THRESHOLD));
1231
1232        if use_parallel {
1233            // Parallel processing using chunks
1234            // Process chunks in parallel, returning (count, first_elem, last_elem) for each
1235            type ChunkInfo<'a, T> = Vec<(u64, Option<&'a Partial<T>>, Option<&'a Partial<T>>)>;
1236            let chunk_info: ChunkInfo<'_, T> = self
1237                .data
1238                .par_chunks(CHUNK_SIZE)
1239                .map(|chunk| {
1240                    // Count unique elements within this chunk
1241                    let mut count = u64::from(!chunk.is_empty());
1242                    for [a, b] in chunk.array_windows::<2>() {
1243                        if a != b {
1244                            count += 1;
1245                        }
1246                    }
1247                    (count, chunk.first(), chunk.last())
1248                })
1249                .collect();
1250
1251            // Combine results, checking boundaries between chunks
1252            let mut total = 0;
1253            for (i, &(count, first_opt, _last_opt)) in chunk_info.iter().enumerate() {
1254                total += count;
1255
1256                // Check boundary with previous chunk
1257                if i > 0
1258                    && let (Some(prev_last), Some(curr_first)) = (chunk_info[i - 1].2, first_opt)
1259                    && prev_last == curr_first
1260                {
1261                    total -= 1; // Deduct 1 if boundary values are equal
1262                }
1263            }
1264
1265            total
1266        } else {
1267            // Sequential processing
1268
1269            // the statement below is equivalent to:
1270            // let mut count = if self.data.is_empty() { 0 } else { 1 };
1271            let mut count = u64::from(!self.data.is_empty());
1272
1273            for [a, b] in self.data.array_windows::<2>() {
1274                if a != b {
1275                    count += 1;
1276                }
1277            }
1278            count
1279        }
1280    }
1281}
1282
1283impl<T: PartialOrd + Clone + Send> Unsorted<T> {
1284    /// Returns the mode of the data.
1285    #[inline]
1286    pub fn mode(&mut self) -> Option<T> {
1287        if self.data.is_empty() {
1288            return None;
1289        }
1290        self.sort();
1291        mode_on_sorted(self.data.iter().map(|p| &p.0)).cloned()
1292    }
1293
1294    /// Returns the modes of the data.
1295    /// Note that there is also a `frequency::mode()` function that return one mode
1296    /// with the highest frequency. If there is a tie, it returns None.
1297    #[inline]
1298    fn modes(&mut self) -> (Vec<T>, usize, u32) {
1299        if self.data.is_empty() {
1300            return (Vec::new(), 0, 0);
1301        }
1302        self.sort();
1303        modes_and_antimodes_on_sorted_slice(&self.data).0
1304    }
1305
1306    /// Returns the antimodes of the data.
1307    /// `antimodes_result` only returns the first 10 antimodes
1308    #[inline]
1309    fn antimodes(&mut self) -> (Vec<T>, usize, u32) {
1310        if self.data.is_empty() {
1311            return (Vec::new(), 0, 0);
1312        }
1313        self.sort();
1314        modes_and_antimodes_on_sorted_slice(&self.data).1
1315    }
1316
1317    /// Returns the modes and antimodes of the data.
1318    /// `antimodes_result` only returns the first 10 antimodes
1319    #[allow(clippy::type_complexity)]
1320    #[inline]
1321    pub fn modes_antimodes(&mut self) -> ((Vec<T>, usize, u32), (Vec<T>, usize, u32)) {
1322        if self.data.is_empty() {
1323            return ((Vec::new(), 0, 0), (Vec::new(), 0, 0));
1324        }
1325        self.sort();
1326        modes_and_antimodes_on_sorted_slice(&self.data)
1327    }
1328}
1329
1330impl Unsorted<Vec<u8>> {
1331    /// Add a byte slice, converting to `Vec<u8>` internally.
1332    ///
1333    /// This is a convenience method that avoids requiring the caller to call
1334    /// `.to_vec()` before `add()`. The allocation still occurs internally,
1335    /// but the API is cleaner and opens the door for future optimizations
1336    /// (e.g., frequency-map deduplication for high-cardinality data).
1337    #[allow(clippy::inline_always)]
1338    #[inline(always)]
1339    pub fn add_bytes(&mut self, v: &[u8]) {
1340        self.sorted = false;
1341        self.data.push(Partial(v.to_vec()));
1342    }
1343}
1344
1345impl<T: PartialOrd + ToPrimitive + Send> Unsorted<T> {
1346    /// Returns the median of the data.
1347    #[inline]
1348    pub fn median(&mut self) -> Option<f64> {
1349        if self.data.is_empty() {
1350            return None;
1351        }
1352        self.sort();
1353        median_on_sorted(&self.data)
1354    }
1355}
1356
1357impl<T: PartialOrd + ToPrimitive + Send + Sync> Unsorted<T> {
1358    /// Returns the Median Absolute Deviation (MAD) of the data.
1359    #[inline]
1360    pub fn mad(&mut self, existing_median: Option<f64>) -> Option<f64> {
1361        if self.data.is_empty() {
1362            return None;
1363        }
1364        if existing_median.is_none() {
1365            self.sort();
1366        }
1367        mad_on_sorted(&self.data, existing_median)
1368    }
1369}
1370
1371impl<T: PartialOrd + ToPrimitive + Send> Unsorted<T> {
1372    /// Returns the quartiles of the data using the traditional sorting approach.
1373    ///
1374    /// This method sorts the data first and then computes quartiles.
1375    /// Time complexity: O(n log n)
1376    #[inline]
1377    pub fn quartiles(&mut self) -> Option<(f64, f64, f64)> {
1378        if self.data.is_empty() {
1379            return None;
1380        }
1381        self.sort();
1382        quartiles_on_sorted(&self.data)
1383    }
1384}
1385
1386impl<T: PartialOrd + ToPrimitive + Send + Sync> Unsorted<T> {
1387    /// Returns the Gini Coefficient of the data.
1388    ///
1389    /// The Gini Coefficient measures inequality in a distribution, ranging from 0 (perfect equality)
1390    /// to 1 (perfect inequality). This method sorts the data first and then computes the Gini coefficient.
1391    /// Time complexity: O(n log n)
1392    #[inline]
1393    pub fn gini(&mut self, precalc_sum: Option<f64>) -> Option<f64> {
1394        if self.data.is_empty() {
1395            return None;
1396        }
1397        self.sort();
1398        gini_on_sorted(&self.data, precalc_sum)
1399    }
1400
1401    /// Returns the kurtosis (excess kurtosis) of the data.
1402    ///
1403    /// Kurtosis measures the "tailedness" of a distribution. Excess kurtosis is kurtosis - 3,
1404    /// where 0 indicates a normal distribution, positive values indicate heavy tails, and
1405    /// negative values indicate light tails. This method sorts the data first and then computes kurtosis.
1406    /// Time complexity: O(n log n)
1407    #[inline]
1408    pub fn kurtosis(
1409        &mut self,
1410        precalc_mean: Option<f64>,
1411        precalc_variance: Option<f64>,
1412    ) -> Option<f64> {
1413        if self.data.is_empty() {
1414            return None;
1415        }
1416        self.sort();
1417        kurtosis_on_sorted(&self.data, precalc_mean, precalc_variance)
1418    }
1419
1420    /// Returns the percentile rank of a value in the data.
1421    ///
1422    /// Returns the percentile rank (0-100) of the given value. If the value is less than all
1423    /// values, returns 0.0. If greater than all, returns 100.0.
1424    /// This method sorts the data first and then computes the percentile rank.
1425    /// Time complexity: O(n log n)
1426    #[inline]
1427    #[allow(clippy::needless_pass_by_value)]
1428    pub fn percentile_rank<V>(&mut self, value: V) -> Option<f64>
1429    where
1430        V: PartialOrd + ToPrimitive,
1431    {
1432        if self.data.is_empty() {
1433            return None;
1434        }
1435        self.sort();
1436        percentile_rank_on_sorted(&self.data, &value)
1437    }
1438
1439    /// Returns the Atkinson Index of the data.
1440    ///
1441    /// The Atkinson Index measures inequality with an inequality aversion parameter ε.
1442    /// It ranges from 0 (perfect equality) to 1 (perfect inequality).
1443    /// Higher ε values give more weight to inequality at the lower end of the distribution.
1444    /// This method sorts the data first and then computes the Atkinson index.
1445    /// Time complexity: O(n log n)
1446    ///
1447    /// # Arguments
1448    /// * `epsilon` - Inequality aversion parameter (must be >= 0). Common values:
1449    ///   - 0.0: No inequality aversion (returns 0)
1450    ///   - 0.5: Moderate aversion
1451    ///   - 1.0: Uses geometric mean (special case)
1452    ///   - 2.0: High aversion
1453    /// * `precalc_mean` - Optional pre-calculated mean
1454    /// * `precalc_geometric_sum` - Optional pre-calculated geometric sum (sum of ln(val)), only used when epsilon = 1
1455    #[inline]
1456    pub fn atkinson(
1457        &mut self,
1458        epsilon: f64,
1459        precalc_mean: Option<f64>,
1460        precalc_geometric_sum: Option<f64>,
1461    ) -> Option<f64> {
1462        if self.data.is_empty() {
1463            return None;
1464        }
1465        self.sort();
1466        atkinson_on_sorted(&self.data, epsilon, precalc_mean, precalc_geometric_sum)
1467    }
1468}
1469
1470impl<T: PartialOrd + ToPrimitive + Clone + Send> Unsorted<T> {
1471    /// Returns the quartiles of the data using selection algorithm.
1472    ///
1473    /// This implementation uses a selection algorithm (quickselect) to find quartiles
1474    /// in O(n) average time complexity instead of O(n log n) sorting.
1475    /// Requires T to implement Clone to create a working copy of the data.
1476    ///
1477    /// **Performance Note**: While theoretically O(n) vs O(n log n), this implementation
1478    /// is often slower than the sorting-based approach for small to medium datasets due to:
1479    /// - Need to find multiple order statistics (3 separate quickselect calls)
1480    /// - Overhead of copying data to avoid mutation
1481    /// - Rayon's highly optimized parallel sorting
1482    #[inline]
1483    pub fn quartiles_with_selection(&mut self) -> Option<(f64, f64, f64)> {
1484        if self.data.is_empty() {
1485            return None;
1486        }
1487        // Use zero-copy approach (indices-based) to avoid cloning all elements
1488        quartiles_with_zero_copy_selection(&self.data)
1489    }
1490}
1491
1492impl<T: PartialOrd + ToPrimitive + Send> Unsorted<T> {
1493    /// Returns the quartiles using zero-copy selection algorithm.
1494    ///
1495    /// This implementation avoids copying data by working with indices instead,
1496    /// providing better performance than the clone-based selection approach.
1497    /// The algorithm is O(n) average time and only allocates a vector of indices (usize).
1498    #[inline]
1499    #[must_use]
1500    pub fn quartiles_zero_copy(&self) -> Option<(f64, f64, f64)> {
1501        if self.data.is_empty() {
1502            return None;
1503        }
1504        quartiles_with_zero_copy_selection(&self.data)
1505    }
1506}
1507
1508impl<T: PartialOrd + Send> Commute for Unsorted<T> {
1509    #[inline]
1510    fn merge(&mut self, mut v: Unsorted<T>) {
1511        if v.is_empty() {
1512            return;
1513        }
1514
1515        self.sorted = false;
1516        // we use std::mem::take to avoid unnecessary allocations
1517        self.data.extend(std::mem::take(&mut v.data));
1518    }
1519}
1520
1521impl<T: PartialOrd> Default for Unsorted<T> {
1522    #[inline]
1523    fn default() -> Unsorted<T> {
1524        Unsorted {
1525            data: Vec::with_capacity(16),
1526            sorted: true, // empty is sorted
1527        }
1528    }
1529}
1530
1531impl<T: PartialOrd + Send> FromIterator<T> for Unsorted<T> {
1532    #[inline]
1533    fn from_iter<I: IntoIterator<Item = T>>(it: I) -> Unsorted<T> {
1534        let mut v = Unsorted::new();
1535        v.extend(it);
1536        v
1537    }
1538}
1539
1540impl<T: PartialOrd> Extend<T> for Unsorted<T> {
1541    #[inline]
1542    fn extend<I: IntoIterator<Item = T>>(&mut self, it: I) {
1543        self.sorted = false;
1544        self.data.extend(it.into_iter().map(Partial));
1545    }
1546}
1547
1548fn custom_percentiles_on_sorted<T>(data: &[Partial<T>], percentiles: &[u8]) -> Option<Vec<T>>
1549where
1550    T: PartialOrd + Clone,
1551{
1552    let len = data.len();
1553
1554    // Early return for empty array or invalid percentiles
1555    if len == 0 || percentiles.iter().any(|&p| p > 100) {
1556        return None;
1557    }
1558
1559    // Optimize: Check if percentiles are already sorted and unique
1560    let unique_percentiles: Vec<u8> = if percentiles.len() <= 1 {
1561        // Single or empty percentile - no need to sort/dedup
1562        percentiles.to_vec()
1563    } else {
1564        // Check if already sorted and unique (common case)
1565        let is_sorted_unique = percentiles.array_windows::<2>().all(|[a, b]| a < b);
1566
1567        if is_sorted_unique {
1568            // Already sorted and unique, use directly without cloning
1569            percentiles.to_vec()
1570        } else {
1571            // Need to sort and dedup - use fixed-size bool array (domain is 0..=100)
1572            let mut seen = [false; 101];
1573            let mut sorted_unique = Vec::with_capacity(percentiles.len().min(101));
1574            for &p in percentiles {
1575                if !seen[p as usize] {
1576                    seen[p as usize] = true;
1577                    sorted_unique.push(p);
1578                }
1579            }
1580            sorted_unique.sort_unstable();
1581            sorted_unique
1582        }
1583    };
1584
1585    let mut results = Vec::with_capacity(unique_percentiles.len());
1586
1587    // SAFETY: All index calculations below are guaranteed to be in bounds
1588    // because we've verified len > 0 and the rank calculation ensures
1589    // the index is within bounds
1590    unsafe {
1591        for &p in &unique_percentiles {
1592            // Calculate the ordinal rank using nearest-rank method
1593            // see https://en.wikipedia.org/wiki/Percentile#The_nearest-rank_method
1594            // n = ⌈(P/100) × N⌉
1595            #[allow(clippy::cast_sign_loss)]
1596            let rank = ((f64::from(p) / 100.0) * len as f64).ceil() as usize;
1597
1598            // Convert to 0-based index
1599            let idx = rank.saturating_sub(1);
1600
1601            // Get the value at that rank and extract the inner value
1602            results.push(data.get_unchecked(idx).0.clone());
1603        }
1604    }
1605
1606    Some(results)
1607}
1608
1609impl<T: PartialOrd + Clone + Send> Unsorted<T> {
1610    /// Returns the requested percentiles of the data.
1611    ///
1612    /// Uses the nearest-rank method to compute percentiles.
1613    /// Each returned value is an actual value from the dataset.
1614    ///
1615    /// # Arguments
1616    /// * `percentiles` - A slice of u8 values representing percentiles to compute (0-100)
1617    ///
1618    /// # Returns
1619    /// * `None` if the data is empty or if any percentile is > 100
1620    /// * `Some(Vec<T>)` containing percentile values in the same order as requested
1621    ///
1622    /// # Example
1623    /// ```
1624    /// use stats::Unsorted;
1625    /// let mut data = Unsorted::new();
1626    /// data.extend(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
1627    /// let percentiles = vec![25, 50, 75];
1628    /// let results = data.custom_percentiles(&percentiles).unwrap();
1629    /// assert_eq!(results, vec![3, 5, 8]);
1630    /// ```
1631    #[inline]
1632    pub fn custom_percentiles(&mut self, percentiles: &[u8]) -> Option<Vec<T>> {
1633        if self.data.is_empty() {
1634            return None;
1635        }
1636        self.sort();
1637        custom_percentiles_on_sorted(&self.data, percentiles)
1638    }
1639}
1640
1641#[cfg(test)]
1642mod test {
1643    use super::*;
1644
1645    #[test]
1646    fn test_cardinality_empty() {
1647        let mut unsorted: Unsorted<i32> = Unsorted::new();
1648        assert_eq!(unsorted.cardinality(false, 1), 0);
1649    }
1650
1651    #[test]
1652    fn test_cardinality_single_element() {
1653        let mut unsorted = Unsorted::new();
1654        unsorted.add(5);
1655        assert_eq!(unsorted.cardinality(false, 1), 1);
1656    }
1657
1658    #[test]
1659    fn test_cardinality_unique_elements() {
1660        let mut unsorted = Unsorted::new();
1661        unsorted.extend(vec![1, 2, 3, 4, 5]);
1662        assert_eq!(unsorted.cardinality(false, 1), 5);
1663    }
1664
1665    #[test]
1666    fn test_cardinality_duplicate_elements() {
1667        let mut unsorted = Unsorted::new();
1668        unsorted.extend(vec![1, 2, 2, 3, 3, 3, 4, 4, 4, 4]);
1669        assert_eq!(unsorted.cardinality(false, 1), 4);
1670    }
1671
1672    #[test]
1673    fn test_cardinality_all_same() {
1674        let mut unsorted = Unsorted::new();
1675        unsorted.extend(vec![1; 100]);
1676        assert_eq!(unsorted.cardinality(false, 1), 1);
1677    }
1678
1679    #[test]
1680    fn test_cardinality_large_range() {
1681        let mut unsorted = Unsorted::new();
1682        unsorted.extend(0..1_000_000);
1683        assert_eq!(unsorted.cardinality(false, 1), 1_000_000);
1684    }
1685
1686    #[test]
1687    fn test_cardinality_large_range_sequential() {
1688        let mut unsorted = Unsorted::new();
1689        unsorted.extend(0..1_000_000);
1690        assert_eq!(unsorted.cardinality(false, 2_000_000), 1_000_000);
1691    }
1692
1693    #[test]
1694    fn test_cardinality_presorted() {
1695        let mut unsorted = Unsorted::new();
1696        unsorted.extend(vec![1, 2, 3, 4, 5]);
1697        unsorted.sort();
1698        assert_eq!(unsorted.cardinality(true, 1), 5);
1699    }
1700
1701    #[test]
1702    fn test_cardinality_float() {
1703        let mut unsorted = Unsorted::new();
1704        unsorted.extend(vec![1.0, 1.0, 2.0, 3.0, 3.0, 4.0]);
1705        assert_eq!(unsorted.cardinality(false, 1), 4);
1706    }
1707
1708    #[test]
1709    fn test_cardinality_string() {
1710        let mut unsorted = Unsorted::new();
1711        unsorted.extend(vec!["a", "b", "b", "c", "c", "c"]);
1712        assert_eq!(unsorted.cardinality(false, 1), 3);
1713    }
1714
1715    #[test]
1716    fn test_quartiles_selection_vs_sorted() {
1717        // Test that selection-based quartiles gives same results as sorting-based
1718        let test_cases = vec![
1719            vec![3, 5, 7, 9],
1720            vec![3, 5, 7],
1721            vec![1, 2, 7, 11],
1722            vec![3, 5, 7, 9, 12],
1723            vec![2, 2, 3, 8, 10],
1724            vec![3, 5, 7, 9, 12, 20],
1725            vec![0, 2, 4, 8, 10, 11],
1726            vec![3, 5, 7, 9, 12, 20, 21],
1727            vec![1, 5, 6, 6, 7, 10, 19],
1728        ];
1729
1730        for test_case in test_cases {
1731            let mut unsorted1 = Unsorted::new();
1732            let mut unsorted2 = Unsorted::new();
1733            let mut unsorted3 = Unsorted::new();
1734            unsorted1.extend(test_case.clone());
1735            unsorted2.extend(test_case.clone());
1736            unsorted3.extend(test_case.clone());
1737
1738            let result_sorted = unsorted1.quartiles();
1739            let result_selection = unsorted2.quartiles_with_selection();
1740            let result_zero_copy = unsorted3.quartiles_zero_copy();
1741
1742            assert_eq!(
1743                result_sorted, result_selection,
1744                "Selection mismatch for test case: {:?}",
1745                test_case
1746            );
1747            assert_eq!(
1748                result_sorted, result_zero_copy,
1749                "Zero-copy mismatch for test case: {:?}",
1750                test_case
1751            );
1752        }
1753    }
1754
1755    #[test]
1756    fn test_quartiles_with_selection_small() {
1757        // Test edge cases for selection-based quartiles
1758        let mut unsorted: Unsorted<i32> = Unsorted::new();
1759        assert_eq!(unsorted.quartiles_with_selection(), None);
1760
1761        let mut unsorted = Unsorted::new();
1762        unsorted.extend(vec![1, 2]);
1763        assert_eq!(unsorted.quartiles_with_selection(), None);
1764
1765        let mut unsorted = Unsorted::new();
1766        unsorted.extend(vec![1, 2, 3]);
1767        assert_eq!(unsorted.quartiles_with_selection(), Some((1.0, 2.0, 3.0)));
1768    }
1769
1770    #[test]
1771    fn test_quickselect() {
1772        let data = vec![
1773            Partial(3),
1774            Partial(1),
1775            Partial(4),
1776            Partial(1),
1777            Partial(5),
1778            Partial(9),
1779            Partial(2),
1780            Partial(6),
1781        ];
1782
1783        // Test finding different positions
1784        assert_eq!(quickselect(&mut data.clone(), 0), Some(&1));
1785        assert_eq!(quickselect(&mut data.clone(), 3), Some(&3));
1786        assert_eq!(quickselect(&mut data.clone(), 7), Some(&9));
1787
1788        // Test edge cases
1789        let mut empty: Vec<Partial<i32>> = vec![];
1790        assert_eq!(quickselect(&mut empty, 0), None);
1791
1792        let mut data = vec![Partial(3), Partial(1), Partial(4), Partial(1), Partial(5)];
1793        assert_eq!(quickselect(&mut data, 10), None); // k >= len
1794    }
1795
1796    #[test]
1797    fn median_stream() {
1798        assert_eq!(median(vec![3usize, 5, 7, 9].into_iter()), Some(6.0));
1799        assert_eq!(median(vec![3usize, 5, 7].into_iter()), Some(5.0));
1800    }
1801
1802    #[test]
1803    fn mad_stream() {
1804        assert_eq!(mad(vec![3usize, 5, 7, 9].into_iter(), None), Some(2.0));
1805        assert_eq!(
1806            mad(
1807                vec![
1808                    86usize, 60, 95, 39, 49, 12, 56, 82, 92, 24, 33, 28, 46, 34, 100, 39, 100, 38,
1809                    50, 61, 39, 88, 5, 13, 64
1810                ]
1811                .into_iter(),
1812                None
1813            ),
1814            Some(16.0)
1815        );
1816    }
1817
1818    #[test]
1819    fn mad_stream_precalc_median() {
1820        let data = vec![3usize, 5, 7, 9].into_iter();
1821        let median1 = median(data.clone());
1822        assert_eq!(mad(data, median1), Some(2.0));
1823
1824        let data2 = vec![
1825            86usize, 60, 95, 39, 49, 12, 56, 82, 92, 24, 33, 28, 46, 34, 100, 39, 100, 38, 50, 61,
1826            39, 88, 5, 13, 64,
1827        ]
1828        .into_iter();
1829        let median2 = median(data2.clone());
1830        assert_eq!(mad(data2, median2), Some(16.0));
1831    }
1832
1833    #[test]
1834    fn mode_stream() {
1835        assert_eq!(mode(vec![3usize, 5, 7, 9].into_iter()), None);
1836        assert_eq!(mode(vec![3usize, 3, 3, 3].into_iter()), Some(3));
1837        assert_eq!(mode(vec![3usize, 3, 3, 4].into_iter()), Some(3));
1838        assert_eq!(mode(vec![4usize, 3, 3, 3].into_iter()), Some(3));
1839        assert_eq!(mode(vec![1usize, 1, 2, 3, 3].into_iter()), None);
1840    }
1841
1842    #[test]
1843    fn median_floats() {
1844        assert_eq!(median(vec![3.0f64, 5.0, 7.0, 9.0].into_iter()), Some(6.0));
1845        assert_eq!(median(vec![3.0f64, 5.0, 7.0].into_iter()), Some(5.0));
1846    }
1847
1848    #[test]
1849    fn mode_floats() {
1850        assert_eq!(mode(vec![3.0f64, 5.0, 7.0, 9.0].into_iter()), None);
1851        assert_eq!(mode(vec![3.0f64, 3.0, 3.0, 3.0].into_iter()), Some(3.0));
1852        assert_eq!(mode(vec![3.0f64, 3.0, 3.0, 4.0].into_iter()), Some(3.0));
1853        assert_eq!(mode(vec![4.0f64, 3.0, 3.0, 3.0].into_iter()), Some(3.0));
1854        assert_eq!(mode(vec![1.0f64, 1.0, 2.0, 3.0, 3.0].into_iter()), None);
1855    }
1856
1857    #[test]
1858    fn modes_stream() {
1859        assert_eq!(modes(vec![3usize, 5, 7, 9].into_iter()), (vec![], 0, 0));
1860        assert_eq!(modes(vec![3usize, 3, 3, 3].into_iter()), (vec![3], 1, 4));
1861        assert_eq!(modes(vec![3usize, 3, 4, 4].into_iter()), (vec![3, 4], 2, 2));
1862        assert_eq!(modes(vec![4usize, 3, 3, 3].into_iter()), (vec![3], 1, 3));
1863        assert_eq!(modes(vec![1usize, 1, 2, 2].into_iter()), (vec![1, 2], 2, 2));
1864        let vec: Vec<u32> = vec![];
1865        assert_eq!(modes(vec.into_iter()), (vec![], 0, 0));
1866    }
1867
1868    #[test]
1869    fn modes_floats() {
1870        assert_eq!(
1871            modes(vec![3_f64, 5.0, 7.0, 9.0].into_iter()),
1872            (vec![], 0, 0)
1873        );
1874        assert_eq!(
1875            modes(vec![3_f64, 3.0, 3.0, 3.0].into_iter()),
1876            (vec![3.0], 1, 4)
1877        );
1878        assert_eq!(
1879            modes(vec![3_f64, 3.0, 4.0, 4.0].into_iter()),
1880            (vec![3.0, 4.0], 2, 2)
1881        );
1882        assert_eq!(
1883            modes(vec![1_f64, 1.0, 2.0, 3.0, 3.0].into_iter()),
1884            (vec![1.0, 3.0], 2, 2)
1885        );
1886    }
1887
1888    #[test]
1889    fn antimodes_stream() {
1890        assert_eq!(
1891            antimodes(vec![3usize, 5, 7, 9].into_iter()),
1892            (vec![3, 5, 7, 9], 4, 1)
1893        );
1894        assert_eq!(
1895            antimodes(vec![1usize, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13].into_iter()),
1896            (vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 13, 1)
1897        );
1898        assert_eq!(
1899            antimodes(vec![1usize, 3, 3, 3].into_iter()),
1900            (vec![1], 1, 1)
1901        );
1902        assert_eq!(
1903            antimodes(vec![3usize, 3, 4, 4].into_iter()),
1904            (vec![3, 4], 2, 2)
1905        );
1906        assert_eq!(
1907            antimodes(
1908                vec![
1909                    3usize, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
1910                    14, 14, 15, 15
1911                ]
1912                .into_iter()
1913            ),
1914            // we only show the first 10 of the 13 antimodes
1915            (vec![3, 4, 5, 6, 7, 8, 9, 10, 11, 12], 13, 2)
1916        );
1917        assert_eq!(
1918            antimodes(
1919                vec![
1920                    3usize, 3, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 4, 4, 5, 5, 6, 6, 7, 7, 13, 13,
1921                    14, 14, 15, 15
1922                ]
1923                .into_iter()
1924            ),
1925            (vec![3, 4, 5, 6, 7, 8, 9, 10, 11, 12], 13, 2)
1926        );
1927        assert_eq!(
1928            antimodes(vec![3usize, 3, 3, 4].into_iter()),
1929            (vec![4], 1, 1)
1930        );
1931        assert_eq!(
1932            antimodes(vec![4usize, 3, 3, 3].into_iter()),
1933            (vec![4], 1, 1)
1934        );
1935        assert_eq!(
1936            antimodes(vec![1usize, 1, 2, 2].into_iter()),
1937            (vec![1, 2], 2, 2)
1938        );
1939        let vec: Vec<u32> = vec![];
1940        assert_eq!(antimodes(vec.into_iter()), (vec![], 0, 0));
1941    }
1942
1943    #[test]
1944    fn antimodes_floats() {
1945        assert_eq!(
1946            antimodes(vec![3_f64, 5.0, 7.0, 9.0].into_iter()),
1947            (vec![3.0, 5.0, 7.0, 9.0], 4, 1)
1948        );
1949        assert_eq!(
1950            antimodes(vec![3_f64, 3.0, 3.0, 3.0].into_iter()),
1951            (vec![], 0, 0)
1952        );
1953        assert_eq!(
1954            antimodes(vec![3_f64, 3.0, 4.0, 4.0].into_iter()),
1955            (vec![3.0, 4.0], 2, 2)
1956        );
1957        assert_eq!(
1958            antimodes(vec![1_f64, 1.0, 2.0, 3.0, 3.0].into_iter()),
1959            (vec![2.0], 1, 1)
1960        );
1961    }
1962
1963    #[test]
1964    fn test_custom_percentiles() {
1965        // Test with integers
1966        let mut unsorted: Unsorted<i32> = Unsorted::new();
1967        unsorted.extend(1..=11); // [1,2,3,4,5,6,7,8,9,10,11]
1968
1969        let result = unsorted.custom_percentiles(&[25, 50, 75]).unwrap();
1970        assert_eq!(result, vec![3, 6, 9]);
1971
1972        // Test with strings
1973        let mut str_data = Unsorted::new();
1974        str_data.extend(vec!["a", "b", "c", "d", "e"]);
1975        let result = str_data.custom_percentiles(&[20, 40, 60, 80]).unwrap();
1976        assert_eq!(result, vec!["a", "b", "c", "d"]);
1977
1978        // Test with chars
1979        let mut char_data = Unsorted::new();
1980        char_data.extend('a'..='e');
1981        let result = char_data.custom_percentiles(&[25, 50, 75]).unwrap();
1982        assert_eq!(result, vec!['b', 'c', 'd']);
1983
1984        // Test with floats
1985        let mut float_data = Unsorted::new();
1986        float_data.extend(vec![1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9]);
1987        let result = float_data
1988            .custom_percentiles(&[10, 30, 50, 70, 90])
1989            .unwrap();
1990        assert_eq!(result, vec![1.1, 3.3, 5.5, 7.7, 9.9]);
1991
1992        // Test with empty percentiles array
1993        let result = float_data.custom_percentiles(&[]).unwrap();
1994        assert_eq!(result, Vec::<f64>::new());
1995
1996        // Test with duplicate percentiles
1997        let result = float_data.custom_percentiles(&[50, 50, 50]).unwrap();
1998        assert_eq!(result, vec![5.5]);
1999
2000        // Test with extreme percentiles
2001        let result = float_data.custom_percentiles(&[0, 100]).unwrap();
2002        assert_eq!(result, vec![1.1, 9.9]);
2003
2004        // Test with unsorted percentiles
2005        let result = float_data.custom_percentiles(&[75, 25, 50]).unwrap();
2006        assert_eq!(result, vec![3.3, 5.5, 7.7]); // results always sorted
2007
2008        // Test with single element
2009        let mut single = Unsorted::new();
2010        single.add(42);
2011        let result = single.custom_percentiles(&[0, 50, 100]).unwrap();
2012        assert_eq!(result, vec![42, 42, 42]);
2013    }
2014
2015    #[test]
2016    fn quartiles_stream() {
2017        assert_eq!(
2018            quartiles(vec![3usize, 5, 7].into_iter()),
2019            Some((3., 5., 7.))
2020        );
2021        assert_eq!(
2022            quartiles(vec![3usize, 5, 7, 9].into_iter()),
2023            Some((4., 6., 8.))
2024        );
2025        assert_eq!(
2026            quartiles(vec![1usize, 2, 7, 11].into_iter()),
2027            Some((1.5, 4.5, 9.))
2028        );
2029        assert_eq!(
2030            quartiles(vec![3usize, 5, 7, 9, 12].into_iter()),
2031            Some((4., 7., 10.5))
2032        );
2033        assert_eq!(
2034            quartiles(vec![2usize, 2, 3, 8, 10].into_iter()),
2035            Some((2., 3., 9.))
2036        );
2037        assert_eq!(
2038            quartiles(vec![3usize, 5, 7, 9, 12, 20].into_iter()),
2039            Some((5., 8., 12.))
2040        );
2041        assert_eq!(
2042            quartiles(vec![0usize, 2, 4, 8, 10, 11].into_iter()),
2043            Some((2., 6., 10.))
2044        );
2045        assert_eq!(
2046            quartiles(vec![3usize, 5, 7, 9, 12, 20, 21].into_iter()),
2047            Some((5., 9., 20.))
2048        );
2049        assert_eq!(
2050            quartiles(vec![1usize, 5, 6, 6, 7, 10, 19].into_iter()),
2051            Some((5., 6., 10.))
2052        );
2053    }
2054
2055    #[test]
2056    fn quartiles_floats() {
2057        assert_eq!(
2058            quartiles(vec![3_f64, 5., 7.].into_iter()),
2059            Some((3., 5., 7.))
2060        );
2061        assert_eq!(
2062            quartiles(vec![3_f64, 5., 7., 9.].into_iter()),
2063            Some((4., 6., 8.))
2064        );
2065        assert_eq!(
2066            quartiles(vec![3_f64, 5., 7., 9., 12.].into_iter()),
2067            Some((4., 7., 10.5))
2068        );
2069        assert_eq!(
2070            quartiles(vec![3_f64, 5., 7., 9., 12., 20.].into_iter()),
2071            Some((5., 8., 12.))
2072        );
2073        assert_eq!(
2074            quartiles(vec![3_f64, 5., 7., 9., 12., 20., 21.].into_iter()),
2075            Some((5., 9., 20.))
2076        );
2077    }
2078
2079    #[test]
2080    fn test_quartiles_zero_copy_small() {
2081        // Test edge cases for zero-copy quartiles
2082        let unsorted: Unsorted<i32> = Unsorted::new();
2083        assert_eq!(unsorted.quartiles_zero_copy(), None);
2084
2085        let mut unsorted = Unsorted::new();
2086        unsorted.extend(vec![1, 2]);
2087        assert_eq!(unsorted.quartiles_zero_copy(), None);
2088
2089        let mut unsorted = Unsorted::new();
2090        unsorted.extend(vec![1, 2, 3]);
2091        assert_eq!(unsorted.quartiles_zero_copy(), Some((1.0, 2.0, 3.0)));
2092
2093        // Test larger case
2094        let mut unsorted = Unsorted::new();
2095        unsorted.extend(vec![3, 5, 7, 9]);
2096        assert_eq!(unsorted.quartiles_zero_copy(), Some((4.0, 6.0, 8.0)));
2097    }
2098
2099    #[test]
2100    fn gini_empty() {
2101        let mut unsorted: Unsorted<i32> = Unsorted::new();
2102        assert_eq!(unsorted.gini(None), None);
2103        let empty_vec: Vec<i32> = vec![];
2104        assert_eq!(gini(empty_vec.into_iter(), None), None);
2105    }
2106
2107    #[test]
2108    fn gini_single_element() {
2109        let mut unsorted = Unsorted::new();
2110        unsorted.add(5);
2111        assert_eq!(unsorted.gini(None), Some(0.0));
2112        assert_eq!(gini(vec![5].into_iter(), None), Some(0.0));
2113    }
2114
2115    #[test]
2116    fn gini_perfect_equality() {
2117        // All values are the same - perfect equality, Gini = 0
2118        let mut unsorted = Unsorted::new();
2119        unsorted.extend(vec![10, 10, 10, 10, 10]);
2120        let result = unsorted.gini(None).unwrap();
2121        assert!((result - 0.0).abs() < 1e-10, "Expected 0.0, got {}", result);
2122
2123        assert!((gini(vec![10, 10, 10, 10, 10].into_iter(), None).unwrap() - 0.0).abs() < 1e-10);
2124    }
2125
2126    #[test]
2127    fn gini_perfect_inequality() {
2128        // One value has everything, others have zero - perfect inequality
2129        // For [0, 0, 0, 0, 100], Gini should be close to 1
2130        let mut unsorted = Unsorted::new();
2131        unsorted.extend(vec![0, 0, 0, 0, 100]);
2132        let result = unsorted.gini(None).unwrap();
2133        // Perfect inequality should give Gini close to 1
2134        // For n=5, one value=100, others=0: G = (2*5*100)/(5*100) - 6/5 = 2 - 1.2 = 0.8
2135        assert!((result - 0.8).abs() < 1e-10, "Expected 0.8, got {}", result);
2136    }
2137
2138    #[test]
2139    fn gini_stream() {
2140        // Test with known values
2141        // For [1, 2, 3, 4, 5]:
2142        // sum = 15
2143        // weighted_sum = 1*1 + 2*2 + 3*3 + 4*4 + 5*5 = 1 + 4 + 9 + 16 + 25 = 55
2144        // n = 5
2145        // G = (2 * 55) / (5 * 15) - 6/5 = 110/75 - 1.2 = 1.4667 - 1.2 = 0.2667
2146        let result = gini(vec![1usize, 2, 3, 4, 5].into_iter(), None).unwrap();
2147        let expected = (2.0 * 55.0) / (5.0 * 15.0) - 6.0 / 5.0;
2148        assert!(
2149            (result - expected).abs() < 1e-10,
2150            "Expected {}, got {}",
2151            expected,
2152            result
2153        );
2154    }
2155
2156    #[test]
2157    fn gini_floats() {
2158        let mut unsorted = Unsorted::new();
2159        unsorted.extend(vec![1.0, 2.0, 3.0, 4.0, 5.0]);
2160        let result = unsorted.gini(None).unwrap();
2161        let expected = (2.0 * 55.0) / (5.0 * 15.0) - 6.0 / 5.0;
2162        assert!((result - expected).abs() < 1e-10);
2163
2164        assert!(
2165            (gini(vec![1.0f64, 2.0, 3.0, 4.0, 5.0].into_iter(), None).unwrap() - expected).abs()
2166                < 1e-10
2167        );
2168    }
2169
2170    #[test]
2171    fn gini_all_zeros() {
2172        // All zeros - sum is zero, Gini is undefined
2173        let mut unsorted = Unsorted::new();
2174        unsorted.extend(vec![0, 0, 0, 0]);
2175        assert_eq!(unsorted.gini(None), None);
2176        assert_eq!(gini(vec![0, 0, 0, 0].into_iter(), None), None);
2177    }
2178
2179    #[test]
2180    fn gini_negative_values() {
2181        // Test with negative values (mathematically valid)
2182        let mut unsorted = Unsorted::new();
2183        unsorted.extend(vec![-5, -3, -1, 1, 3, 5]);
2184        let result = unsorted.gini(None);
2185        // Sum is 0, so Gini is undefined
2186        assert_eq!(result, None);
2187
2188        // Test with negative values that don't sum to zero
2189        let mut unsorted = Unsorted::new();
2190        unsorted.extend(vec![-2, -1, 0, 1, 2]);
2191        let result = unsorted.gini(None);
2192        // Sum is 0, so Gini is undefined
2193        assert_eq!(result, None);
2194
2195        // Test with values containing negatives that sum to non-zero
2196        // Gini is undefined for negative values, should return None
2197        let mut unsorted = Unsorted::new();
2198        unsorted.extend(vec![-1, 0, 1, 2, 3]);
2199        let result = unsorted.gini(None);
2200        assert_eq!(result, None);
2201    }
2202
2203    #[test]
2204    fn gini_known_cases() {
2205        // Test case: [1, 1, 1, 1, 1] - perfect equality
2206        let mut unsorted = Unsorted::new();
2207        unsorted.extend(vec![1, 1, 1, 1, 1]);
2208        let result = unsorted.gini(None).unwrap();
2209        assert!((result - 0.0).abs() < 1e-10);
2210
2211        // Test case: [0, 0, 0, 0, 1] - high inequality
2212        let mut unsorted = Unsorted::new();
2213        unsorted.extend(vec![0, 0, 0, 0, 1]);
2214        let result = unsorted.gini(None).unwrap();
2215        // G = (2 * 5 * 1) / (5 * 1) - 6/5 = 2 - 1.2 = 0.8
2216        assert!((result - 0.8).abs() < 1e-10);
2217
2218        // Test case: [1, 2, 3] - moderate inequality
2219        let mut unsorted = Unsorted::new();
2220        unsorted.extend(vec![1, 2, 3]);
2221        let result = unsorted.gini(None).unwrap();
2222        // sum = 6, weighted_sum = 1*1 + 2*2 + 3*3 = 1 + 4 + 9 = 14
2223        // G = (2 * 14) / (3 * 6) - 4/3 = 28/18 - 4/3 = 1.5556 - 1.3333 = 0.2222
2224        let expected = (2.0 * 14.0) / (3.0 * 6.0) - 4.0 / 3.0;
2225        assert!((result - expected).abs() < 1e-10);
2226    }
2227
2228    #[test]
2229    fn gini_precalc_sum() {
2230        // Test with pre-calculated sum
2231        let mut unsorted = Unsorted::new();
2232        unsorted.extend(vec![1, 2, 3, 4, 5]);
2233        let precalc_sum = Some(15.0);
2234        let result = unsorted.gini(precalc_sum).unwrap();
2235        let expected = (2.0 * 55.0) / (5.0 * 15.0) - 6.0 / 5.0;
2236        assert!((result - expected).abs() < 1e-10);
2237
2238        // Test that pre-calculated sum gives same result
2239        let mut unsorted2 = Unsorted::new();
2240        unsorted2.extend(vec![1, 2, 3, 4, 5]);
2241        let result2 = unsorted2.gini(None).unwrap();
2242        assert!((result - result2).abs() < 1e-10);
2243    }
2244
2245    #[test]
2246    fn gini_large_dataset() {
2247        // Test with larger dataset to exercise parallel path
2248        let data: Vec<i32> = (1..=1000).collect();
2249        let result = gini(data.iter().copied(), None);
2250        assert!(result.is_some());
2251        let gini_val = result.unwrap();
2252        // For uniform distribution, Gini should be positive but not too high
2253        assert!(gini_val > 0.0 && gini_val < 0.5);
2254    }
2255
2256    #[test]
2257    fn gini_unsorted_vs_sorted() {
2258        // Test that sorting doesn't affect result
2259        let mut unsorted1 = Unsorted::new();
2260        unsorted1.extend(vec![5, 2, 8, 1, 9, 3, 7, 4, 6]);
2261        let result1 = unsorted1.gini(None).unwrap();
2262
2263        let mut unsorted2 = Unsorted::new();
2264        unsorted2.extend(vec![1, 2, 3, 4, 5, 6, 7, 8, 9]);
2265        let result2 = unsorted2.gini(None).unwrap();
2266
2267        assert!((result1 - result2).abs() < 1e-10);
2268    }
2269
2270    #[test]
2271    fn gini_small_values() {
2272        // Test with very small values
2273        let mut unsorted = Unsorted::new();
2274        unsorted.extend(vec![0.001, 0.002, 0.003, 0.004, 0.005]);
2275        let result = unsorted.gini(None);
2276        assert!(result.is_some());
2277        // Should be same as [1, 2, 3, 4, 5] scaled down
2278        let expected = (2.0 * 55.0) / (5.0 * 15.0) - 6.0 / 5.0;
2279        assert!((result.unwrap() - expected).abs() < 1e-10);
2280    }
2281
2282    #[test]
2283    fn gini_large_values() {
2284        // Test with large values
2285        let mut unsorted = Unsorted::new();
2286        unsorted.extend(vec![1000, 2000, 3000, 4000, 5000]);
2287        let result = unsorted.gini(None);
2288        assert!(result.is_some());
2289        // Should be same as [1, 2, 3, 4, 5] scaled up
2290        let expected = (2.0 * 55.0) / (5.0 * 15.0) - 6.0 / 5.0;
2291        assert!((result.unwrap() - expected).abs() < 1e-10);
2292    }
2293
2294    #[test]
2295    fn gini_two_elements() {
2296        // Test with exactly 2 elements
2297        let mut unsorted = Unsorted::new();
2298        unsorted.extend(vec![1, 2]);
2299        let result = unsorted.gini(None).unwrap();
2300        // For [1, 2]: sum=3, weighted_sum=1*1+2*2=5, n=2
2301        // G = (2*5)/(2*3) - 3/2 = 10/6 - 1.5 = 1.6667 - 1.5 = 0.1667
2302        let expected = (2.0 * 5.0) / (2.0 * 3.0) - 3.0 / 2.0;
2303        assert!((result - expected).abs() < 1e-10);
2304    }
2305
2306    #[test]
2307    fn gini_precalc_sum_zero() {
2308        // Test with pre-calculated sum of zero (should return None)
2309        let mut unsorted = Unsorted::new();
2310        unsorted.extend(vec![1, 2, 3, 4, 5]);
2311        let result = unsorted.gini(Some(0.0));
2312        assert_eq!(result, None);
2313    }
2314
2315    #[test]
2316    fn gini_precalc_sum_negative() {
2317        // Gini is undefined for negative values, should return None
2318        let mut unsorted = Unsorted::new();
2319        unsorted.extend(vec![-5, -3, -1, 1, 3]);
2320        let result = unsorted.gini(None);
2321        assert_eq!(result, None);
2322
2323        // Negative precalculated sum should also return None
2324        let mut unsorted = Unsorted::new();
2325        unsorted.extend(vec![1, 2, 3]);
2326        let result = unsorted.gini(Some(-5.0));
2327        assert_eq!(result, None);
2328    }
2329
2330    #[test]
2331    fn gini_different_types() {
2332        // Test with different integer types
2333        let mut unsorted_u32 = Unsorted::new();
2334        unsorted_u32.extend(vec![1u32, 2, 3, 4, 5]);
2335        let result_u32 = unsorted_u32.gini(None).unwrap();
2336
2337        let mut unsorted_i64 = Unsorted::new();
2338        unsorted_i64.extend(vec![1i64, 2, 3, 4, 5]);
2339        let result_i64 = unsorted_i64.gini(None).unwrap();
2340
2341        let expected = (2.0 * 55.0) / (5.0 * 15.0) - 6.0 / 5.0;
2342        assert!((result_u32 - expected).abs() < 1e-10);
2343        assert!((result_i64 - expected).abs() < 1e-10);
2344    }
2345
2346    #[test]
2347    fn gini_extreme_inequality() {
2348        // Test with extreme inequality: one very large value, many zeros
2349        let mut unsorted = Unsorted::new();
2350        unsorted.extend(vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 1000]);
2351        let result = unsorted.gini(None).unwrap();
2352        // For [0,0,0,0,0,0,0,0,0,1000]: sum=1000, weighted_sum=10*1000=10000, n=10
2353        // G = (2*10000)/(10*1000) - 11/10 = 20/10 - 1.1 = 2 - 1.1 = 0.9
2354        assert!((result - 0.9).abs() < 1e-10);
2355    }
2356
2357    #[test]
2358    fn gini_duplicate_values() {
2359        // Test with many duplicate values
2360        let mut unsorted = Unsorted::new();
2361        unsorted.extend(vec![1, 1, 1, 5, 5, 5, 10, 10, 10]);
2362        let result = unsorted.gini(None);
2363        assert!(result.is_some());
2364        // Should be between 0 and 1
2365        let gini_val = result.unwrap();
2366        assert!(gini_val >= 0.0 && gini_val <= 1.0);
2367    }
2368
2369    #[test]
2370    fn kurtosis_empty() {
2371        let mut unsorted: Unsorted<i32> = Unsorted::new();
2372        assert_eq!(unsorted.kurtosis(None, None), None);
2373        let empty_vec: Vec<i32> = vec![];
2374        assert_eq!(kurtosis(empty_vec.into_iter(), None, None), None);
2375    }
2376
2377    #[test]
2378    fn kurtosis_small() {
2379        // Need at least 4 elements
2380        let mut unsorted = Unsorted::new();
2381        unsorted.extend(vec![1, 2]);
2382        assert_eq!(unsorted.kurtosis(None, None), None);
2383
2384        let mut unsorted = Unsorted::new();
2385        unsorted.extend(vec![1, 2, 3]);
2386        assert_eq!(unsorted.kurtosis(None, None), None);
2387    }
2388
2389    #[test]
2390    fn kurtosis_normal_distribution() {
2391        // Normal distribution should have kurtosis close to 0
2392        let mut unsorted = Unsorted::new();
2393        unsorted.extend(vec![1, 2, 3, 4, 5]);
2394        let result = unsorted.kurtosis(None, None);
2395        assert!(result.is_some());
2396        // For small samples, kurtosis can vary significantly
2397    }
2398
2399    #[test]
2400    fn kurtosis_all_same() {
2401        // All same values - variance is 0, kurtosis undefined
2402        let mut unsorted = Unsorted::new();
2403        unsorted.extend(vec![5, 5, 5, 5]);
2404        assert_eq!(unsorted.kurtosis(None, None), None);
2405    }
2406
2407    #[test]
2408    fn kurtosis_stream() {
2409        let result = kurtosis(vec![1usize, 2, 3, 4, 5].into_iter(), None, None);
2410        assert!(result.is_some());
2411    }
2412
2413    #[test]
2414    fn kurtosis_precalc_mean_variance() {
2415        // Test with pre-calculated mean and variance
2416        let mut unsorted = Unsorted::new();
2417        unsorted.extend(vec![1, 2, 3, 4, 5]);
2418
2419        // Calculate mean and variance manually
2420        let mean = 3.0f64;
2421        let variance = ((1.0f64 - 3.0).powi(2)
2422            + (2.0f64 - 3.0).powi(2)
2423            + (3.0f64 - 3.0).powi(2)
2424            + (4.0f64 - 3.0).powi(2)
2425            + (5.0f64 - 3.0).powi(2))
2426            / 5.0;
2427
2428        let result = unsorted.kurtosis(Some(mean), Some(variance));
2429        assert!(result.is_some());
2430
2431        // Test that pre-calculated values give same result
2432        let mut unsorted2 = Unsorted::new();
2433        unsorted2.extend(vec![1, 2, 3, 4, 5]);
2434        let result2 = unsorted2.kurtosis(None, None);
2435        assert!((result.unwrap() - result2.unwrap()).abs() < 1e-10);
2436    }
2437
2438    #[test]
2439    fn kurtosis_precalc_mean_only() {
2440        // Test with pre-calculated mean only
2441        let mut unsorted = Unsorted::new();
2442        unsorted.extend(vec![1, 2, 3, 4, 5]);
2443        let mean = 3.0f64;
2444
2445        let result = unsorted.kurtosis(Some(mean), None);
2446        assert!(result.is_some());
2447
2448        // Test that pre-calculated mean gives same result
2449        let mut unsorted2 = Unsorted::new();
2450        unsorted2.extend(vec![1, 2, 3, 4, 5]);
2451        let result2 = unsorted2.kurtosis(None, None);
2452        assert!((result.unwrap() - result2.unwrap()).abs() < 1e-10);
2453    }
2454
2455    #[test]
2456    fn kurtosis_precalc_variance_only() {
2457        // Test with pre-calculated variance only
2458        let mut unsorted = Unsorted::new();
2459        unsorted.extend(vec![1, 2, 3, 4, 5]);
2460        let variance = ((1.0f64 - 3.0).powi(2)
2461            + (2.0f64 - 3.0).powi(2)
2462            + (3.0f64 - 3.0).powi(2)
2463            + (4.0f64 - 3.0).powi(2)
2464            + (5.0f64 - 3.0).powi(2))
2465            / 5.0;
2466
2467        let result = unsorted.kurtosis(None, Some(variance));
2468        assert!(result.is_some());
2469
2470        // Test that pre-calculated variance gives same result
2471        let mut unsorted2 = Unsorted::new();
2472        unsorted2.extend(vec![1, 2, 3, 4, 5]);
2473        let result2 = unsorted2.kurtosis(None, None);
2474        assert!((result.unwrap() - result2.unwrap()).abs() < 1e-10);
2475    }
2476
2477    #[test]
2478    fn kurtosis_exact_calculation() {
2479        // Test with exact calculation for [1, 2, 3, 4]
2480        // Mean = 2.5
2481        // Variance = ((1-2.5)^2 + (2-2.5)^2 + (3-2.5)^2 + (4-2.5)^2) / 4 = (2.25 + 0.25 + 0.25 + 2.25) / 4 = 1.25
2482        // Variance^2 = 1.5625
2483        // Fourth powers: (1-2.5)^4 + (2-2.5)^4 + (3-2.5)^4 + (4-2.5)^4 = 5.0625 + 0.0625 + 0.0625 + 5.0625 = 10.25
2484        // n = 4
2485        // Kurtosis = (4*5*10.25) / (3*2*1*1.5625) - 3*3*3/(2*1) = 205 / 9.375 - 13.5 = 21.8667 - 13.5 = 8.3667
2486        let mut unsorted = Unsorted::new();
2487        unsorted.extend(vec![1, 2, 3, 4]);
2488        let result = unsorted.kurtosis(None, None).unwrap();
2489        // For small samples, kurtosis can be very high
2490        assert!(result.is_finite());
2491    }
2492
2493    #[test]
2494    fn kurtosis_uniform_distribution() {
2495        // Uniform distribution should have negative excess kurtosis
2496        let mut unsorted = Unsorted::new();
2497        unsorted.extend(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
2498        let result = unsorted.kurtosis(None, None).unwrap();
2499        // Uniform distribution has excess kurtosis = -1.2
2500        // But for small samples, it can vary significantly
2501        assert!(result.is_finite());
2502    }
2503
2504    #[test]
2505    fn kurtosis_large_dataset() {
2506        // Test with larger dataset to exercise parallel path
2507        let data: Vec<i32> = (1..=1000).collect();
2508        let result = kurtosis(data.iter().copied(), None, None);
2509        assert!(result.is_some());
2510        let kurt_val = result.unwrap();
2511        assert!(kurt_val.is_finite());
2512    }
2513
2514    #[test]
2515    fn kurtosis_unsorted_vs_sorted() {
2516        // Test that sorting doesn't affect result
2517        let mut unsorted1 = Unsorted::new();
2518        unsorted1.extend(vec![5, 2, 8, 1, 9, 3, 7, 4, 6]);
2519        let result1 = unsorted1.kurtosis(None, None).unwrap();
2520
2521        let mut unsorted2 = Unsorted::new();
2522        unsorted2.extend(vec![1, 2, 3, 4, 5, 6, 7, 8, 9]);
2523        let result2 = unsorted2.kurtosis(None, None).unwrap();
2524
2525        assert!((result1 - result2).abs() < 1e-10);
2526    }
2527
2528    #[test]
2529    fn kurtosis_minimum_size() {
2530        // Test with exactly 4 elements (minimum required)
2531        let mut unsorted = Unsorted::new();
2532        unsorted.extend(vec![1, 2, 3, 4]);
2533        let result = unsorted.kurtosis(None, None);
2534        assert!(result.is_some());
2535        assert!(result.unwrap().is_finite());
2536    }
2537
2538    #[test]
2539    fn kurtosis_heavy_tailed() {
2540        // Test with heavy-tailed distribution (outliers)
2541        let mut unsorted = Unsorted::new();
2542        unsorted.extend(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 100]);
2543        let result = unsorted.kurtosis(None, None).unwrap();
2544        // Heavy tails should give positive excess kurtosis
2545        assert!(result.is_finite());
2546        // With an outlier, kurtosis should be positive
2547        assert!(result > -10.0); // Allow some variance but should be reasonable
2548    }
2549
2550    #[test]
2551    fn kurtosis_light_tailed() {
2552        // Test with light-tailed distribution (values close together)
2553        let mut unsorted = Unsorted::new();
2554        unsorted.extend(vec![10, 11, 12, 13, 14, 15, 16, 17, 18, 19]);
2555        let result = unsorted.kurtosis(None, None).unwrap();
2556        // Light tails might give negative excess kurtosis
2557        assert!(result.is_finite());
2558    }
2559
2560    #[test]
2561    fn kurtosis_small_variance() {
2562        // Test with very small variance (values very close together)
2563        let mut unsorted = Unsorted::new();
2564        unsorted.extend(vec![10.0, 10.001, 10.002, 10.003, 10.004]);
2565        let result = unsorted.kurtosis(None, None);
2566        // Should still compute (variance is very small but non-zero)
2567        assert!(result.is_some());
2568        assert!(result.unwrap().is_finite());
2569    }
2570
2571    #[test]
2572    fn kurtosis_precalc_zero_variance() {
2573        // Test with pre-calculated variance of zero (should return None)
2574        let mut unsorted = Unsorted::new();
2575        unsorted.extend(vec![1, 2, 3, 4, 5]);
2576        let result = unsorted.kurtosis(None, Some(0.0));
2577        assert_eq!(result, None);
2578    }
2579
2580    #[test]
2581    fn kurtosis_precalc_negative_variance() {
2582        // Test with negative variance (invalid, but should handle gracefully)
2583        let mut unsorted = Unsorted::new();
2584        unsorted.extend(vec![1, 2, 3, 4, 5]);
2585        // Negative variance is invalid, but function should handle it
2586        let result = unsorted.kurtosis(None, Some(-1.0));
2587        // Should either return None or handle it gracefully
2588        // The function computes variance_sq = variance^2, so negative becomes positive
2589        // But this is invalid input, so behavior may vary
2590        // For now, just check it doesn't panic
2591        let _ = result;
2592    }
2593
2594    #[test]
2595    fn kurtosis_different_types() {
2596        // Test with different integer types
2597        let mut unsorted_u32 = Unsorted::new();
2598        unsorted_u32.extend(vec![1u32, 2, 3, 4, 5]);
2599        let result_u32 = unsorted_u32.kurtosis(None, None).unwrap();
2600
2601        let mut unsorted_i64 = Unsorted::new();
2602        unsorted_i64.extend(vec![1i64, 2, 3, 4, 5]);
2603        let result_i64 = unsorted_i64.kurtosis(None, None).unwrap();
2604
2605        assert!((result_u32 - result_i64).abs() < 1e-10);
2606    }
2607
2608    #[test]
2609    fn kurtosis_floating_point_precision() {
2610        // Test floating point precision
2611        let mut unsorted = Unsorted::new();
2612        unsorted.extend(vec![1.1, 2.2, 3.3, 4.4, 5.5]);
2613        let result = unsorted.kurtosis(None, None);
2614        assert!(result.is_some());
2615        assert!(result.unwrap().is_finite());
2616    }
2617
2618    #[test]
2619    fn kurtosis_negative_values() {
2620        // Test with negative values
2621        let mut unsorted = Unsorted::new();
2622        unsorted.extend(vec![-5, -3, -1, 1, 3, 5]);
2623        let result = unsorted.kurtosis(None, None);
2624        assert!(result.is_some());
2625        assert!(result.unwrap().is_finite());
2626    }
2627
2628    #[test]
2629    fn kurtosis_mixed_positive_negative() {
2630        // Test with mixed positive and negative values
2631        let mut unsorted = Unsorted::new();
2632        unsorted.extend(vec![-10, -5, 0, 5, 10]);
2633        let result = unsorted.kurtosis(None, None);
2634        assert!(result.is_some());
2635        assert!(result.unwrap().is_finite());
2636    }
2637
2638    #[test]
2639    fn kurtosis_duplicate_values() {
2640        // Test with duplicate values (but not all same)
2641        let mut unsorted = Unsorted::new();
2642        unsorted.extend(vec![1, 1, 2, 2, 3, 3, 4, 4, 5, 5]);
2643        let result = unsorted.kurtosis(None, None);
2644        assert!(result.is_some());
2645        assert!(result.unwrap().is_finite());
2646    }
2647
2648    #[test]
2649    fn kurtosis_precalc_mean_wrong() {
2650        // Test that wrong pre-calculated mean gives wrong result
2651        let mut unsorted1 = Unsorted::new();
2652        unsorted1.extend(vec![1, 2, 3, 4, 5]);
2653        let correct_result = unsorted1.kurtosis(None, None).unwrap();
2654
2655        let mut unsorted2 = Unsorted::new();
2656        unsorted2.extend(vec![1, 2, 3, 4, 5]);
2657        let wrong_mean = 10.0; // Wrong mean
2658        let wrong_result = unsorted2.kurtosis(Some(wrong_mean), None).unwrap();
2659
2660        // Results should be different
2661        assert!((correct_result - wrong_result).abs() > 1e-5);
2662    }
2663
2664    #[test]
2665    fn percentile_rank_empty() {
2666        let mut unsorted: Unsorted<i32> = Unsorted::new();
2667        assert_eq!(unsorted.percentile_rank(5), None);
2668        let empty_vec: Vec<i32> = vec![];
2669        assert_eq!(percentile_rank(empty_vec.into_iter(), 5), None);
2670    }
2671
2672    #[test]
2673    fn percentile_rank_basic() {
2674        let mut unsorted = Unsorted::new();
2675        unsorted.extend(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
2676
2677        // Value less than all
2678        assert_eq!(unsorted.percentile_rank(0), Some(0.0));
2679
2680        // Value greater than all
2681        assert_eq!(unsorted.percentile_rank(11), Some(100.0));
2682
2683        // Median (5) should be around 50th percentile
2684        let rank = unsorted.percentile_rank(5).unwrap();
2685        assert!((rank - 50.0).abs() < 1.0);
2686
2687        // First value should be at 10th percentile
2688        let rank = unsorted.percentile_rank(1).unwrap();
2689        assert!((rank - 10.0).abs() < 1.0);
2690    }
2691
2692    #[test]
2693    fn percentile_rank_duplicates() {
2694        let mut unsorted = Unsorted::new();
2695        unsorted.extend(vec![1, 1, 2, 2, 3, 3, 4, 4, 5, 5]);
2696
2697        // Value 2 appears twice, should be at 40th percentile (4 values <= 2)
2698        let rank = unsorted.percentile_rank(2).unwrap();
2699        assert!((rank - 40.0).abs() < 1.0);
2700    }
2701
2702    #[test]
2703    fn percentile_rank_stream() {
2704        let result = percentile_rank(vec![1usize, 2, 3, 4, 5].into_iter(), 3);
2705        assert_eq!(result, Some(60.0)); // 3 out of 5 values <= 3
2706    }
2707
2708    #[test]
2709    fn percentile_rank_many_ties() {
2710        // 100 copies of 5 followed by 100 copies of 10 — tests O(log n) upper bound
2711        let mut unsorted = Unsorted::new();
2712        for _ in 0..100 {
2713            unsorted.add(5u32);
2714        }
2715        for _ in 0..100 {
2716            unsorted.add(10u32);
2717        }
2718        // 100 values <= 5 out of 200
2719        let rank = unsorted.percentile_rank(5).unwrap();
2720        assert!((rank - 50.0).abs() < f64::EPSILON);
2721        // All 200 values <= 10
2722        let mut unsorted2 = Unsorted::new();
2723        for _ in 0..100 {
2724            unsorted2.add(5u32);
2725        }
2726        for _ in 0..100 {
2727            unsorted2.add(10u32);
2728        }
2729        let rank = unsorted2.percentile_rank(10).unwrap();
2730        assert!((rank - 100.0).abs() < f64::EPSILON);
2731    }
2732
2733    #[test]
2734    fn atkinson_empty() {
2735        let mut unsorted: Unsorted<i32> = Unsorted::new();
2736        assert_eq!(unsorted.atkinson(1.0, None, None), None);
2737        let empty_vec: Vec<i32> = vec![];
2738        assert_eq!(atkinson(empty_vec.into_iter(), 1.0, None, None), None);
2739    }
2740
2741    #[test]
2742    fn atkinson_single_element() {
2743        let mut unsorted = Unsorted::new();
2744        unsorted.add(5);
2745        assert_eq!(unsorted.atkinson(1.0, None, None), Some(0.0));
2746        assert_eq!(atkinson(vec![5].into_iter(), 1.0, None, None), Some(0.0));
2747    }
2748
2749    #[test]
2750    fn atkinson_perfect_equality() {
2751        // All values the same - perfect equality, Atkinson = 0
2752        let mut unsorted = Unsorted::new();
2753        unsorted.extend(vec![10, 10, 10, 10, 10]);
2754        let result = unsorted.atkinson(1.0, None, None).unwrap();
2755        assert!((result - 0.0).abs() < 1e-10);
2756    }
2757
2758    #[test]
2759    fn atkinson_epsilon_zero() {
2760        // Epsilon = 0 means no inequality aversion, should return 0
2761        let mut unsorted = Unsorted::new();
2762        unsorted.extend(vec![1, 2, 3, 4, 5]);
2763        let result = unsorted.atkinson(0.0, None, None).unwrap();
2764        assert!((result - 0.0).abs() < 1e-10);
2765    }
2766
2767    #[test]
2768    fn atkinson_epsilon_one() {
2769        // Epsilon = 1 uses geometric mean
2770        let mut unsorted = Unsorted::new();
2771        unsorted.extend(vec![1, 2, 3, 4, 5]);
2772        let result = unsorted.atkinson(1.0, None, None);
2773        assert!(result.is_some());
2774    }
2775
2776    #[test]
2777    fn atkinson_negative_epsilon() {
2778        let mut unsorted = Unsorted::new();
2779        unsorted.extend(vec![1, 2, 3, 4, 5]);
2780        assert_eq!(unsorted.atkinson(-1.0, None, None), None);
2781    }
2782
2783    #[test]
2784    fn atkinson_zero_mean() {
2785        // If mean is zero, Atkinson is undefined
2786        let mut unsorted = Unsorted::new();
2787        unsorted.extend(vec![0, 0, 0, 0]);
2788        assert_eq!(unsorted.atkinson(1.0, None, None), None);
2789    }
2790
2791    #[test]
2792    fn atkinson_stream() {
2793        let result = atkinson(vec![1usize, 2, 3, 4, 5].into_iter(), 1.0, None, None);
2794        assert!(result.is_some());
2795    }
2796
2797    #[test]
2798    fn atkinson_precalc_mean_geometric_sum() {
2799        // Test with pre-calculated mean and geometric_sum
2800        let mut unsorted = Unsorted::new();
2801        unsorted.extend(vec![1, 2, 3, 4, 5]);
2802
2803        // Calculate mean and geometric_sum manually
2804        let mean = 3.0f64;
2805        let geometric_sum = 1.0f64.ln() + 2.0f64.ln() + 3.0f64.ln() + 4.0f64.ln() + 5.0f64.ln();
2806
2807        let result = unsorted.atkinson(1.0, Some(mean), Some(geometric_sum));
2808        assert!(result.is_some());
2809
2810        // Test that pre-calculated values give same result
2811        let mut unsorted2 = Unsorted::new();
2812        unsorted2.extend(vec![1, 2, 3, 4, 5]);
2813        let result2 = unsorted2.atkinson(1.0, None, None);
2814        assert!((result.unwrap() - result2.unwrap()).abs() < 1e-10);
2815    }
2816
2817    #[test]
2818    fn atkinson_precalc_mean_only() {
2819        // Test with pre-calculated mean only
2820        let mut unsorted = Unsorted::new();
2821        unsorted.extend(vec![1, 2, 3, 4, 5]);
2822        let mean = 3.0f64;
2823
2824        let result = unsorted.atkinson(1.0, Some(mean), None);
2825        assert!(result.is_some());
2826
2827        // Test that pre-calculated mean gives same result
2828        let mut unsorted2 = Unsorted::new();
2829        unsorted2.extend(vec![1, 2, 3, 4, 5]);
2830        let result2 = unsorted2.atkinson(1.0, None, None);
2831        assert!((result.unwrap() - result2.unwrap()).abs() < 1e-10);
2832    }
2833
2834    #[test]
2835    fn atkinson_precalc_geometric_sum_only() {
2836        // Test with pre-calculated geometric_sum only
2837        let mut unsorted = Unsorted::new();
2838        unsorted.extend(vec![1, 2, 3, 4, 5]);
2839        let geometric_sum = 1.0f64.ln() + 2.0f64.ln() + 3.0f64.ln() + 4.0f64.ln() + 5.0f64.ln();
2840
2841        let result = unsorted.atkinson(1.0, None, Some(geometric_sum));
2842        assert!(result.is_some());
2843
2844        // Test that pre-calculated geometric_sum gives same result
2845        let mut unsorted2 = Unsorted::new();
2846        unsorted2.extend(vec![1, 2, 3, 4, 5]);
2847        let result2 = unsorted2.atkinson(1.0, None, None);
2848        assert!((result.unwrap() - result2.unwrap()).abs() < 1e-10);
2849    }
2850}
2851
2852#[cfg(test)]
2853mod bench {
2854    use super::*;
2855    use std::time::Instant;
2856
2857    #[test]
2858    #[ignore] // Run with `cargo test comprehensive_quartiles_benchmark -- --ignored --nocapture` to see performance comparison
2859    fn comprehensive_quartiles_benchmark() {
2860        // Test a wide range of data sizes
2861        let data_sizes = vec![
2862            1_000, 10_000, 100_000, 500_000, 1_000_000, 2_000_000, 5_000_000, 10_000_000,
2863        ];
2864
2865        println!("=== COMPREHENSIVE QUARTILES BENCHMARK ===\n");
2866
2867        for size in data_sizes {
2868            println!("--- Testing with {} elements ---", size);
2869
2870            // Test different data patterns
2871            let test_patterns = vec![
2872                ("Random", generate_random_data(size)),
2873                ("Reverse Sorted", {
2874                    let mut v = Vec::with_capacity(size);
2875                    for x in (0..size).rev() {
2876                        v.push(x as i32);
2877                    }
2878                    v
2879                }),
2880                ("Already Sorted", {
2881                    let mut v = Vec::with_capacity(size);
2882                    for x in 0..size {
2883                        v.push(x as i32);
2884                    }
2885                    v
2886                }),
2887                ("Many Duplicates", {
2888                    // Create a vector with just a few distinct values repeated many times
2889                    let mut v = Vec::with_capacity(size);
2890                    let chunk_size = size / 100;
2891                    for i in 0..100 {
2892                        v.extend(std::iter::repeat_n(i, chunk_size));
2893                    }
2894                    // Add any remaining elements
2895                    v.extend(std::iter::repeat_n(0, size - v.len()));
2896                    v
2897                }),
2898            ];
2899
2900            for (pattern_name, test_data) in test_patterns {
2901                println!("\n  Pattern: {}", pattern_name);
2902
2903                // Benchmark sorting-based approach
2904                let mut unsorted1 = Unsorted::new();
2905                unsorted1.extend(test_data.clone());
2906
2907                let start = Instant::now();
2908                let result_sorted = unsorted1.quartiles();
2909                let sorted_time = start.elapsed();
2910
2911                // Benchmark selection-based approach (with copying)
2912                let mut unsorted2 = Unsorted::new();
2913                unsorted2.extend(test_data.clone());
2914
2915                let start = Instant::now();
2916                let result_selection = unsorted2.quartiles_with_selection();
2917                let selection_time = start.elapsed();
2918
2919                // Benchmark zero-copy selection-based approach
2920                let mut unsorted3 = Unsorted::new();
2921                unsorted3.extend(test_data);
2922
2923                let start = Instant::now();
2924                let result_zero_copy = unsorted3.quartiles_zero_copy();
2925                let zero_copy_time = start.elapsed();
2926
2927                // Verify results are the same
2928                assert_eq!(result_sorted, result_selection);
2929                assert_eq!(result_sorted, result_zero_copy);
2930
2931                let selection_speedup =
2932                    sorted_time.as_nanos() as f64 / selection_time.as_nanos() as f64;
2933                let zero_copy_speedup =
2934                    sorted_time.as_nanos() as f64 / zero_copy_time.as_nanos() as f64;
2935
2936                println!("    Sorting:       {:>12?}", sorted_time);
2937                println!(
2938                    "    Selection:     {:>12?} (speedup: {:.2}x)",
2939                    selection_time, selection_speedup
2940                );
2941                println!(
2942                    "    Zero-copy:     {:>12?} (speedup: {:.2}x)",
2943                    zero_copy_time, zero_copy_speedup
2944                );
2945
2946                let best_algorithm =
2947                    if zero_copy_speedup > 1.0 && zero_copy_speedup >= selection_speedup {
2948                        "ZERO-COPY"
2949                    } else if selection_speedup > 1.0 {
2950                        "SELECTION"
2951                    } else {
2952                        "SORTING"
2953                    };
2954                println!("    Best: {}", best_algorithm);
2955            }
2956
2957            println!(); // Add blank line between sizes
2958        }
2959    }
2960
2961    // Generate random data for benchmarking
2962    fn generate_random_data(size: usize) -> Vec<i32> {
2963        // Simple LCG random number generator for reproducible results
2964        let mut rng = 1234567u64;
2965        let mut vec = Vec::with_capacity(size);
2966        for _ in 0..size {
2967            rng = rng.wrapping_mul(1103515245).wrapping_add(12345);
2968            vec.push((rng >> 16) as i32);
2969        }
2970        vec
2971    }
2972
2973    #[test]
2974    #[ignore] // Run with `cargo test find_selection_threshold -- --ignored --nocapture` to find exact threshold
2975    fn find_selection_threshold() {
2976        println!("=== FINDING SELECTION ALGORITHM THRESHOLD ===\n");
2977
2978        // Binary search approach to find the threshold
2979        let mut found_threshold = None;
2980        let test_sizes = vec![
2981            1_000_000, 2_000_000, 3_000_000, 4_000_000, 5_000_000, 7_500_000, 10_000_000,
2982            15_000_000, 20_000_000, 25_000_000, 30_000_000,
2983        ];
2984
2985        for size in test_sizes {
2986            println!("Testing size: {}", size);
2987
2988            // Use random data as it's most representative of real-world scenarios
2989            let test_data = generate_random_data(size);
2990
2991            // Run multiple iterations to get average performance
2992            let iterations = 3;
2993            let mut sorting_total = 0u128;
2994            let mut selection_total = 0u128;
2995            let mut zero_copy_total = 0u128;
2996
2997            for i in 0..iterations {
2998                println!("  Iteration {}/{}", i + 1, iterations);
2999
3000                // Sorting approach
3001                let mut unsorted1 = Unsorted::new();
3002                unsorted1.extend(test_data.clone());
3003
3004                let start = Instant::now();
3005                let _result_sorted = unsorted1.quartiles();
3006                sorting_total += start.elapsed().as_nanos();
3007
3008                // Selection approach (with copying)
3009                let mut unsorted2 = Unsorted::new();
3010                unsorted2.extend(test_data.clone());
3011
3012                let start = Instant::now();
3013                let _result_selection = unsorted2.quartiles_with_selection();
3014                selection_total += start.elapsed().as_nanos();
3015
3016                // Zero-copy selection approach
3017                let mut unsorted3 = Unsorted::new();
3018                unsorted3.extend(test_data.clone());
3019
3020                let start = Instant::now();
3021                let _result_zero_copy = unsorted3.quartiles_zero_copy();
3022                zero_copy_total += start.elapsed().as_nanos();
3023            }
3024
3025            let avg_sorting = sorting_total / iterations as u128;
3026            let avg_selection = selection_total / iterations as u128;
3027            let avg_zero_copy = zero_copy_total / iterations as u128;
3028            let selection_speedup = avg_sorting as f64 / avg_selection as f64;
3029            let zero_copy_speedup = avg_sorting as f64 / avg_zero_copy as f64;
3030
3031            println!(
3032                "  Average sorting:    {:>12.2}ms",
3033                avg_sorting as f64 / 1_000_000.0
3034            );
3035            println!(
3036                "  Average selection:  {:>12.2}ms (speedup: {:.2}x)",
3037                avg_selection as f64 / 1_000_000.0,
3038                selection_speedup
3039            );
3040            println!(
3041                "  Average zero-copy:  {:>12.2}ms (speedup: {:.2}x)",
3042                avg_zero_copy as f64 / 1_000_000.0,
3043                zero_copy_speedup
3044            );
3045
3046            if (selection_speedup > 1.0 || zero_copy_speedup > 1.0) && found_threshold.is_none() {
3047                found_threshold = Some(size);
3048                let best_method = if zero_copy_speedup > selection_speedup {
3049                    "Zero-copy"
3050                } else {
3051                    "Selection"
3052                };
3053                println!(
3054                    "  *** THRESHOLD FOUND: {} becomes faster at {} elements ***",
3055                    best_method, size
3056                );
3057            }
3058
3059            println!();
3060        }
3061
3062        match found_threshold {
3063            Some(threshold) => println!(
3064                "🎯 Selection algorithm becomes faster at approximately {} elements",
3065                threshold
3066            ),
3067            None => println!("❌ Selection algorithm did not become faster in the tested range"),
3068        }
3069    }
3070
3071    #[test]
3072    #[ignore] // Run with `cargo test benchmark_different_data_types -- --ignored --nocapture` to test different data types
3073    fn benchmark_different_data_types() {
3074        println!("=== BENCHMARKING DIFFERENT DATA TYPES ===\n");
3075
3076        let size = 5_000_000; // Use a large size where differences might be visible
3077
3078        // Test with f64 (floating point)
3079        println!("Testing with f64 data:");
3080        let float_data: Vec<f64> = generate_random_data(size)
3081            .into_iter()
3082            .map(|x| x as f64 / 1000.0)
3083            .collect();
3084
3085        let mut unsorted1 = Unsorted::new();
3086        unsorted1.extend(float_data.clone());
3087        let start = Instant::now();
3088        let _result = unsorted1.quartiles();
3089        let sorting_time = start.elapsed();
3090
3091        let mut unsorted2 = Unsorted::new();
3092        unsorted2.extend(float_data.clone());
3093        let start = Instant::now();
3094        let _result = unsorted2.quartiles_with_selection();
3095        let selection_time = start.elapsed();
3096
3097        let mut unsorted3 = Unsorted::new();
3098        unsorted3.extend(float_data);
3099        let start = Instant::now();
3100        let _result = unsorted3.quartiles_zero_copy();
3101        let zero_copy_time = start.elapsed();
3102
3103        println!("  Sorting:    {:?}", sorting_time);
3104        println!("  Selection:  {:?}", selection_time);
3105        println!("  Zero-copy:  {:?}", zero_copy_time);
3106        println!(
3107            "  Selection Speedup:  {:.2}x",
3108            sorting_time.as_nanos() as f64 / selection_time.as_nanos() as f64
3109        );
3110        println!(
3111            "  Zero-copy Speedup:  {:.2}x\n",
3112            sorting_time.as_nanos() as f64 / zero_copy_time.as_nanos() as f64
3113        );
3114
3115        // Test with i64 (larger integers)
3116        println!("Testing with i64 data:");
3117        let int64_data: Vec<i64> = generate_random_data(size)
3118            .into_iter()
3119            .map(|x| x as i64 * 1000)
3120            .collect();
3121
3122        let mut unsorted1 = Unsorted::new();
3123        unsorted1.extend(int64_data.clone());
3124        let start = Instant::now();
3125        let _result = unsorted1.quartiles();
3126        let sorting_time = start.elapsed();
3127
3128        let mut unsorted2 = Unsorted::new();
3129        unsorted2.extend(int64_data.clone());
3130        let start = Instant::now();
3131        let _result = unsorted2.quartiles_with_selection();
3132        let selection_time = start.elapsed();
3133
3134        let mut unsorted3 = Unsorted::new();
3135        unsorted3.extend(int64_data);
3136        let start = Instant::now();
3137        let _result = unsorted3.quartiles_zero_copy();
3138        let zero_copy_time = start.elapsed();
3139
3140        println!("  Sorting:    {:?}", sorting_time);
3141        println!("  Selection:  {:?}", selection_time);
3142        println!("  Zero-copy:  {:?}", zero_copy_time);
3143        println!(
3144            "  Selection Speedup:  {:.2}x",
3145            sorting_time.as_nanos() as f64 / selection_time.as_nanos() as f64
3146        );
3147        println!(
3148            "  Zero-copy Speedup:  {:.2}x",
3149            sorting_time.as_nanos() as f64 / zero_copy_time.as_nanos() as f64
3150        );
3151    }
3152}
stats/unsorted.rs

stats/
unsorted.rs