stats/
unsorted.rs

1use num_traits::ToPrimitive;
2use rayon::iter::{IndexedParallelIterator, IntoParallelRefIterator, ParallelIterator};
3use rayon::prelude::ParallelSlice;
4use rayon::slice::ParallelSliceMut;
5
6use serde::{Deserialize, Serialize};
7
8use {crate::Commute, crate::Partial};
9
10const PARALLEL_THRESHOLD: usize = 10_000;
11
12/// Compute the exact median on a stream of data.
13///
14/// (This has time complexity `O(nlogn)` and space complexity `O(n)`.)
15pub fn median<I>(it: I) -> Option<f64>
16where
17    I: Iterator,
18    <I as Iterator>::Item: PartialOrd + ToPrimitive,
19{
20    it.collect::<Unsorted<_>>().median()
21}
22
23/// Compute the median absolute deviation (MAD) on a stream of data.
24pub fn mad<I>(it: I, precalc_median: Option<f64>) -> Option<f64>
25where
26    I: Iterator,
27    <I as Iterator>::Item: PartialOrd + ToPrimitive,
28{
29    it.collect::<Unsorted<_>>().mad(precalc_median)
30}
31
32/// Compute the exact 1-, 2-, and 3-quartiles (Q1, Q2 a.k.a. median, and Q3) on a stream of data.
33///
34/// (This has time complexity `O(n log n)` and space complexity `O(n)`.)
35pub fn quartiles<I>(it: I) -> Option<(f64, f64, f64)>
36where
37    I: Iterator,
38    <I as Iterator>::Item: PartialOrd + ToPrimitive,
39{
40    it.collect::<Unsorted<_>>().quartiles()
41}
42
43/// Compute the exact mode on a stream of data.
44///
45/// (This has time complexity `O(nlogn)` and space complexity `O(n)`.)
46///
47/// If the data does not have a mode, then `None` is returned.
48pub fn mode<T, I>(it: I) -> Option<T>
49where
50    T: PartialOrd + Clone,
51    I: Iterator<Item = T>,
52{
53    it.collect::<Unsorted<T>>().mode()
54}
55
56/// Compute the modes on a stream of data.
57///
58/// If there is a single mode, then only that value is returned in the `Vec`
59/// however, if there are multiple values tied for occurring the most amount of times
60/// those values are returned.
61///
62/// ## Example
63/// ```
64/// use stats;
65///
66/// let vals = vec![1, 1, 2, 2, 3];
67///
68/// assert_eq!(stats::modes(vals.into_iter()), (vec![1, 2], 2, 2));
69/// ```
70/// This has time complexity `O(n)`
71///
72/// If the data does not have a mode, then an empty `Vec` is returned.
73pub fn modes<T, I>(it: I) -> (Vec<T>, usize, u32)
74where
75    T: PartialOrd + Clone,
76    I: Iterator<Item = T>,
77{
78    it.collect::<Unsorted<T>>().modes()
79}
80
81/// Compute the antimodes on a stream of data.
82///
83/// Antimode is the least frequent non-zero score.
84///
85/// If there is a single antimode, then only that value is returned in the `Vec`
86/// however, if there are multiple values tied for occurring the least amount of times
87/// those values are returned.
88///
89/// Only the first 10 antimodes are returned to prevent returning the whole set
90/// when cardinality = number of records (i.e. all unique values).
91///
92/// ## Example
93/// ```
94/// use stats;
95///
96/// let vals = vec![1, 1, 2, 2, 3];
97///
98/// assert_eq!(stats::antimodes(vals.into_iter()), (vec![3], 1, 1));
99/// ```
100/// This has time complexity `O(n)`
101///
102/// If the data does not have an antimode, then an empty `Vec` is returned.
103pub fn antimodes<T, I>(it: I) -> (Vec<T>, usize, u32)
104where
105    T: PartialOrd + Clone,
106    I: Iterator<Item = T>,
107{
108    let (antimodes_result, antimodes_count, antimodes_occurrences) =
109        it.collect::<Unsorted<T>>().antimodes();
110    (antimodes_result, antimodes_count, antimodes_occurrences)
111}
112
113/// Compute the Gini Coefficient on a stream of data.
114///
115/// The Gini Coefficient measures inequality in a distribution, ranging from 0 (perfect equality)
116/// to 1 (perfect inequality).
117///
118/// (This has time complexity `O(n log n)` and space complexity `O(n)`.)
119pub fn gini<I>(it: I, precalc_sum: Option<f64>) -> Option<f64>
120where
121    I: Iterator,
122    <I as Iterator>::Item: PartialOrd + ToPrimitive + Sync,
123{
124    it.collect::<Unsorted<_>>().gini(precalc_sum)
125}
126
127/// Compute the kurtosis (excess kurtosis) on a stream of data.
128///
129/// Kurtosis measures the "tailedness" of a distribution. Excess kurtosis is kurtosis - 3,
130/// where 0 indicates a normal distribution, positive values indicate heavy tails, and
131/// negative values indicate light tails.
132///
133/// (This has time complexity `O(n log n)` and space complexity `O(n)`.)
134pub fn kurtosis<I>(it: I, precalc_mean: Option<f64>, precalc_variance: Option<f64>) -> Option<f64>
135where
136    I: Iterator,
137    <I as Iterator>::Item: PartialOrd + ToPrimitive + Sync,
138{
139    it.collect::<Unsorted<_>>()
140        .kurtosis(precalc_mean, precalc_variance)
141}
142
143/// Compute the percentile rank of a value on a stream of data.
144///
145/// Returns the percentile rank (0-100) of the given value in the distribution.
146/// If the value is less than all values, returns 0.0. If greater than all, returns 100.0.
147///
148/// (This has time complexity `O(n log n)` and space complexity `O(n)`.)
149pub fn percentile_rank<I, V>(it: I, value: V) -> Option<f64>
150where
151    I: Iterator,
152    <I as Iterator>::Item: PartialOrd + ToPrimitive + Sync,
153    V: PartialOrd + ToPrimitive,
154{
155    it.collect::<Unsorted<_>>().percentile_rank(value)
156}
157
158/// Compute the Atkinson Index on a stream of data.
159///
160/// The Atkinson Index measures inequality with an inequality aversion parameter ε.
161/// It ranges from 0 (perfect equality) to 1 (perfect inequality).
162/// Higher ε values give more weight to inequality at the lower end of the distribution.
163///
164/// (This has time complexity `O(n log n)` and space complexity `O(n)`.)
165pub fn atkinson<I>(
166    it: I,
167    epsilon: f64,
168    precalc_mean: Option<f64>,
169    precalc_geometric_sum: Option<f64>,
170) -> Option<f64>
171where
172    I: Iterator,
173    <I as Iterator>::Item: PartialOrd + ToPrimitive + Sync,
174{
175    it.collect::<Unsorted<_>>()
176        .atkinson(epsilon, precalc_mean, precalc_geometric_sum)
177}
178
179fn median_on_sorted<T>(data: &[T]) -> Option<f64>
180where
181    T: PartialOrd + ToPrimitive,
182{
183    Some(match data.len() {
184        // Empty slice case - return None early
185        0 => return None,
186        // Single element case - return that element converted to f64
187        1 => data.first()?.to_f64()?,
188        // Even length case - average the two middle elements
189        len if len.is_multiple_of(2) => {
190            let idx = len / 2;
191            // Safety: we know these indices are valid because we checked len is even and non-zero,
192            // so idx-1 and idx are valid indices into data
193            let v1 = unsafe { data.get_unchecked(idx - 1) }.to_f64()?;
194            let v2 = unsafe { data.get_unchecked(idx) }.to_f64()?;
195            f64::midpoint(v1, v2)
196        }
197        // Odd length case - return the middle element
198        // Safety: we know the index is within bounds
199        len => unsafe { data.get_unchecked(len / 2) }.to_f64()?,
200    })
201}
202
203fn mad_on_sorted<T>(data: &[T], precalc_median: Option<f64>) -> Option<f64>
204where
205    T: Sync + PartialOrd + ToPrimitive,
206{
207    if data.is_empty() {
208        return None;
209    }
210    let median_obs = precalc_median.unwrap_or_else(|| median_on_sorted(data).unwrap());
211
212    // Use adaptive parallel processing based on data size
213    let mut abs_diff_vec = if data.len() < PARALLEL_THRESHOLD {
214        // Sequential processing for small datasets
215        let mut vec = Vec::with_capacity(data.len());
216        for x in data {
217            vec.push((median_obs - unsafe { x.to_f64().unwrap_unchecked() }).abs());
218        }
219        vec
220    } else {
221        // Parallel processing for large datasets
222        data.par_iter()
223            .map(|x| (median_obs - unsafe { x.to_f64().unwrap_unchecked() }).abs())
224            .collect()
225    };
226
227    // Use adaptive sorting based on size
228    if abs_diff_vec.len() < PARALLEL_THRESHOLD {
229        abs_diff_vec.sort_unstable_by(|a, b| unsafe { a.partial_cmp(b).unwrap_unchecked() });
230    } else {
231        abs_diff_vec.par_sort_unstable_by(|a, b| unsafe { a.partial_cmp(b).unwrap_unchecked() });
232    }
233    median_on_sorted(&abs_diff_vec)
234}
235
236fn gini_on_sorted<T>(data: &[Partial<T>], precalc_sum: Option<f64>) -> Option<f64>
237where
238    T: Sync + PartialOrd + ToPrimitive,
239{
240    let len = data.len();
241
242    // Early return for empty data
243    if len == 0 {
244        return None;
245    }
246
247    // Single element case: perfect equality, Gini = 0
248    if len == 1 {
249        return Some(0.0);
250    }
251
252    // Use pre-calculated sum if provided, otherwise compute it
253    let sum = if let Some(precalc) = precalc_sum {
254        precalc
255    } else if len < PARALLEL_THRESHOLD {
256        let mut sum = 0.0;
257        for x in data {
258            sum += x.0.to_f64()?;
259        }
260        sum
261    } else {
262        data.par_iter()
263            .map(|x| unsafe { x.0.to_f64().unwrap_unchecked() })
264            .sum()
265    };
266
267    // Compute weighted sum
268    let weighted_sum = if len < PARALLEL_THRESHOLD {
269        let mut weighted_sum = 0.0;
270        for (i, x) in data.iter().enumerate() {
271            let val = x.0.to_f64()?;
272            weighted_sum += (i + 1) as f64 * val;
273        }
274        weighted_sum
275    } else {
276        // Compute weighted sum in parallel using enumerate to get indices
277        data.par_iter()
278            .enumerate()
279            .map(|(i, x)| {
280                let val = unsafe { x.0.to_f64().unwrap_unchecked() };
281                (i + 1) as f64 * val
282            })
283            .sum()
284    };
285
286    // If sum is zero, Gini is undefined
287    if sum == 0.0 {
288        return None;
289    }
290
291    // Compute Gini coefficient using the formula:
292    // G = (2 * Σ(i * y_i)) / (n * Σ(y_i)) - (n + 1) / n
293    // where i is 1-indexed rank and y_i are sorted values
294    let n = len as f64;
295    let gini = (2.0 * weighted_sum) / (n * sum) - (n + 1.0) / n;
296
297    Some(gini)
298}
299
300fn kurtosis_on_sorted<T>(
301    data: &[Partial<T>],
302    precalc_mean: Option<f64>,
303    precalc_variance: Option<f64>,
304) -> Option<f64>
305where
306    T: Sync + PartialOrd + ToPrimitive,
307{
308    let len = data.len();
309
310    // Need at least 4 elements for meaningful kurtosis
311    if len < 4 {
312        return None;
313    }
314
315    // Use pre-calculated mean if provided, otherwise compute it
316    let mean = if let Some(precalc) = precalc_mean {
317        precalc
318    } else {
319        let sum: f64 = if len < PARALLEL_THRESHOLD {
320            let mut sum = 0.0;
321            for x in data {
322                sum += x.0.to_f64()?;
323            }
324            sum
325        } else {
326            data.par_iter()
327                .map(|x| unsafe { x.0.to_f64().unwrap_unchecked() })
328                .sum()
329        };
330        sum / len as f64
331    };
332
333    // Compute variance_sq and fourth_power_sum
334    // If variance is provided, we can compute variance_sq directly (variance_sq = variance^2)
335    // Otherwise, we need to compute variance from the data
336    let (variance_sq, fourth_power_sum) = if let Some(variance) = precalc_variance {
337        // Use pre-calculated variance: variance_sq = variance^2
338        let variance_sq = variance * variance;
339
340        // Still need to compute fourth_power_sum
341        let fourth_power_sum = if len < PARALLEL_THRESHOLD {
342            let mut sum = 0.0;
343            for x in data {
344                let val = x.0.to_f64()?;
345                let diff = val - mean;
346                let diff_sq = diff * diff;
347                sum += diff_sq * diff_sq;
348            }
349            sum
350        } else {
351            data.par_iter()
352                .map(|x| {
353                    let val = unsafe { x.0.to_f64().unwrap_unchecked() };
354                    let diff = val - mean;
355                    let diff_sq = diff * diff;
356                    diff_sq * diff_sq
357                })
358                .sum()
359        };
360
361        (variance_sq, fourth_power_sum)
362    } else {
363        // Compute both variance_sum and fourth_power_sum
364        let (variance_sum, fourth_power_sum) = if len < PARALLEL_THRESHOLD {
365            let mut variance_sum = 0.0;
366            let mut fourth_power_sum = 0.0;
367
368            for x in data {
369                let val = x.0.to_f64()?;
370                let diff = val - mean;
371                let diff_sq = diff * diff;
372                variance_sum += diff_sq;
373                fourth_power_sum += diff_sq * diff_sq;
374            }
375
376            (variance_sum, fourth_power_sum)
377        } else {
378            let variance_sum: f64 = data
379                .par_iter()
380                .map(|x| {
381                    let val = unsafe { x.0.to_f64().unwrap_unchecked() };
382                    let diff = val - mean;
383                    diff * diff
384                })
385                .sum();
386
387            let fourth_power_sum: f64 = data
388                .par_iter()
389                .map(|x| {
390                    let val = unsafe { x.0.to_f64().unwrap_unchecked() };
391                    let diff = val - mean;
392                    let diff_sq = diff * diff;
393                    diff_sq * diff_sq
394                })
395                .sum();
396
397            (variance_sum, fourth_power_sum)
398        };
399
400        let variance = variance_sum / len as f64;
401
402        // If variance is zero, all values are the same, kurtosis is undefined
403        if variance == 0.0 {
404            return None;
405        }
406
407        let variance_sq = variance * variance;
408        (variance_sq, fourth_power_sum)
409    };
410
411    // If variance_sq is zero, all values are the same, kurtosis is undefined
412    if variance_sq == 0.0 {
413        return None;
414    }
415
416    let n = len as f64;
417
418    // Sample excess kurtosis formula:
419    // kurtosis = (n(n+1) * Σ((x_i - mean)⁴)) / ((n-1)(n-2)(n-3) * variance²) - 3(n-1)²/((n-2)(n-3))
420    let kurtosis = (n * (n + 1.0) * fourth_power_sum)
421        / ((n - 1.0) * (n - 2.0) * (n - 3.0) * variance_sq)
422        - 3.0 * (n - 1.0) * (n - 1.0) / ((n - 2.0) * (n - 3.0));
423
424    Some(kurtosis)
425}
426
427fn percentile_rank_on_sorted<T, V>(data: &[Partial<T>], value: &V) -> Option<f64>
428where
429    T: PartialOrd + ToPrimitive,
430    V: PartialOrd + ToPrimitive,
431{
432    let len = data.len();
433
434    if len == 0 {
435        return None;
436    }
437
438    let value_f64 = value.to_f64()?;
439
440    // Binary search to find the position where value would be inserted
441    // This gives us the number of values <= value
442    let count_leq = data.binary_search_by(|x| {
443        x.0.to_f64()
444            .unwrap_or(f64::NAN)
445            .partial_cmp(&value_f64)
446            .unwrap_or(std::cmp::Ordering::Less)
447    });
448
449    let count = match count_leq {
450        Ok(idx) => {
451            // Value found at idx, but we need to count all equal values
452            let mut count = idx + 1;
453            // Count all equal values after idx
454            for x in data.iter().skip(idx + 1) {
455                if let Some(x_val) = x.0.to_f64() {
456                    if (x_val - value_f64).abs() < 1e-9 {
457                        count += 1;
458                    } else {
459                        break;
460                    }
461                } else {
462                    break;
463                }
464            }
465            count
466        }
467        Err(idx) => idx, // Number of values less than value
468    };
469
470    // Percentile rank = (count / n) * 100
471    Some((count as f64 / len as f64) * 100.0)
472}
473
474fn atkinson_on_sorted<T>(
475    data: &[Partial<T>],
476    epsilon: f64,
477    precalc_mean: Option<f64>,
478    precalc_geometric_sum: Option<f64>,
479) -> Option<f64>
480where
481    T: Sync + PartialOrd + ToPrimitive,
482{
483    let len = data.len();
484
485    // Early return for empty data
486    if len == 0 {
487        return None;
488    }
489
490    // Single element case: perfect equality, Atkinson = 0
491    if len == 1 {
492        return Some(0.0);
493    }
494
495    // Epsilon must be non-negative
496    if epsilon < 0.0 {
497        return None;
498    }
499
500    // Use pre-calculated mean if provided, otherwise compute it
501    let mean = if let Some(precalc) = precalc_mean {
502        precalc
503    } else {
504        let sum: f64 = if len < PARALLEL_THRESHOLD {
505            let mut sum = 0.0;
506            for x in data {
507                sum += x.0.to_f64()?;
508            }
509            sum
510        } else {
511            data.par_iter()
512                .map(|x| unsafe { x.0.to_f64().unwrap_unchecked() })
513                .sum()
514        };
515        sum / len as f64
516    };
517
518    // If mean is zero, Atkinson is undefined
519    if mean == 0.0 {
520        return None;
521    }
522
523    // Handle special case: epsilon = 1 (uses geometric mean)
524    if (epsilon - 1.0).abs() < 1e-10 {
525        // A_1 = 1 - (geometric_mean / mean)
526        let geometric_sum: f64 = if let Some(precalc) = precalc_geometric_sum {
527            precalc
528        } else if len < PARALLEL_THRESHOLD {
529            let mut sum = 0.0;
530            for x in data {
531                let val = x.0.to_f64()?;
532                if val <= 0.0 {
533                    // Geometric mean undefined for non-positive values
534                    return None;
535                }
536                sum += val.ln();
537            }
538            sum
539        } else {
540            data.par_iter()
541                .map(|x| {
542                    let val = unsafe { x.0.to_f64().unwrap_unchecked() };
543                    if val <= 0.0 {
544                        return f64::NAN;
545                    }
546                    val.ln()
547                })
548                .sum()
549        };
550
551        if geometric_sum.is_nan() {
552            return None;
553        }
554
555        let geometric_mean = (geometric_sum / len as f64).exp();
556        return Some(1.0 - geometric_mean / mean);
557    }
558
559    // General case: epsilon != 1
560    // A_ε = 1 - (1/n * Σ((x_i/mean)^(1-ε)))^(1/(1-ε))
561    let exponent = 1.0 - epsilon;
562
563    let sum_powered: f64 = if len < PARALLEL_THRESHOLD {
564        let mut sum = 0.0;
565        for x in data {
566            let val = x.0.to_f64()?;
567            if val < 0.0 {
568                // Negative values with non-integer exponent are undefined
569                return None;
570            }
571            let ratio = val / mean;
572            sum += ratio.powf(exponent);
573        }
574        sum
575    } else {
576        data.par_iter()
577            .map(|x| {
578                let val = unsafe { x.0.to_f64().unwrap_unchecked() };
579                if val < 0.0 {
580                    return f64::NAN;
581                }
582                let ratio = val / mean;
583                ratio.powf(exponent)
584            })
585            .sum()
586    };
587
588    if sum_powered.is_nan() || sum_powered <= 0.0 {
589        return None;
590    }
591
592    let atkinson = 1.0 - (sum_powered / len as f64).powf(1.0 / exponent);
593    Some(atkinson)
594}
595
596/// Selection algorithm to find the k-th smallest element in O(n) average time.
597/// This is an implementation of quickselect algorithm.
598fn quickselect<T>(data: &mut [Partial<T>], k: usize) -> Option<&T>
599where
600    T: PartialOrd,
601{
602    if data.is_empty() || k >= data.len() {
603        return None;
604    }
605
606    let mut left = 0;
607    let mut right = data.len() - 1;
608
609    loop {
610        if left == right {
611            return Some(&data[left].0);
612        }
613
614        // Use median-of-three pivot selection for better performance
615        let pivot_idx = median_of_three_pivot(data, left, right);
616        let pivot_idx = partition(data, left, right, pivot_idx);
617
618        match k.cmp(&pivot_idx) {
619            std::cmp::Ordering::Equal => return Some(&data[pivot_idx].0),
620            std::cmp::Ordering::Less => right = pivot_idx - 1,
621            std::cmp::Ordering::Greater => left = pivot_idx + 1,
622        }
623    }
624}
625
626/// Zero-copy selection algorithm that works with indices instead of copying data.
627/// This avoids the overhead of cloning data elements.
628fn quickselect_by_index<'a, T>(
629    data: &'a [Partial<T>],
630    indices: &mut [usize],
631    k: usize,
632) -> Option<&'a T>
633where
634    T: PartialOrd,
635{
636    if data.is_empty() || indices.is_empty() || k >= indices.len() {
637        return None;
638    }
639
640    let mut left = 0;
641    let mut right = indices.len() - 1;
642
643    loop {
644        if left == right {
645            return Some(&data[indices[left]].0);
646        }
647
648        // Use median-of-three pivot selection for better performance
649        let pivot_idx = median_of_three_pivot_by_index(data, indices, left, right);
650        let pivot_idx = partition_by_index(data, indices, left, right, pivot_idx);
651
652        match k.cmp(&pivot_idx) {
653            std::cmp::Ordering::Equal => return Some(&data[indices[pivot_idx]].0),
654            std::cmp::Ordering::Less => right = pivot_idx - 1,
655            std::cmp::Ordering::Greater => left = pivot_idx + 1,
656        }
657    }
658}
659
660/// Select the median of three elements as pivot for better quickselect performance
661fn median_of_three_pivot<T>(data: &[Partial<T>], left: usize, right: usize) -> usize
662where
663    T: PartialOrd,
664{
665    let mid = left + (right - left) / 2;
666
667    if data[left] <= data[mid] {
668        if data[mid] <= data[right] {
669            mid
670        } else if data[left] <= data[right] {
671            right
672        } else {
673            left
674        }
675    } else if data[left] <= data[right] {
676        left
677    } else if data[mid] <= data[right] {
678        right
679    } else {
680        mid
681    }
682}
683
684/// Select the median of three elements as pivot using indices
685fn median_of_three_pivot_by_index<T>(
686    data: &[Partial<T>],
687    indices: &[usize],
688    left: usize,
689    right: usize,
690) -> usize
691where
692    T: PartialOrd,
693{
694    let mid = left + (right - left) / 2;
695
696    if data[indices[left]] <= data[indices[mid]] {
697        if data[indices[mid]] <= data[indices[right]] {
698            mid
699        } else if data[indices[left]] <= data[indices[right]] {
700            right
701        } else {
702            left
703        }
704    } else if data[indices[left]] <= data[indices[right]] {
705        left
706    } else if data[indices[mid]] <= data[indices[right]] {
707        right
708    } else {
709        mid
710    }
711}
712
713/// Partition function for quickselect
714fn partition<T>(data: &mut [Partial<T>], left: usize, right: usize, pivot_idx: usize) -> usize
715where
716    T: PartialOrd,
717{
718    // Move pivot to end
719    data.swap(pivot_idx, right);
720    let mut store_idx = left;
721
722    // Move all elements smaller than pivot to the left
723    // Cache pivot position for better cache locality (access data[right] directly each time)
724    for i in left..right {
725        // Safety: i, store_idx, and right are guaranteed to be in bounds
726        // Compare directly with pivot at data[right] - compiler should optimize this access
727        if unsafe { data.get_unchecked(i) <= data.get_unchecked(right) } {
728            data.swap(i, store_idx);
729            store_idx += 1;
730        }
731    }
732
733    // Move pivot to its final place
734    data.swap(store_idx, right);
735    store_idx
736}
737
738/// Partition function for quickselect using indices
739fn partition_by_index<T>(
740    data: &[Partial<T>],
741    indices: &mut [usize],
742    left: usize,
743    right: usize,
744    pivot_idx: usize,
745) -> usize
746where
747    T: PartialOrd,
748{
749    // Move pivot to end
750    indices.swap(pivot_idx, right);
751    let mut store_idx = left;
752
753    // Cache pivot index and value for better cache locality
754    // This reduces indirection: indices[right] -> data[indices[right]]
755    // Safety: right is guaranteed to be in bounds
756    let pivot_idx_cached = unsafe { *indices.get_unchecked(right) };
757    let pivot_val = unsafe { data.get_unchecked(pivot_idx_cached) };
758
759    // Move all elements smaller than pivot to the left
760    let mut elem_idx: usize;
761    for i in left..right {
762        // Safety: i and store_idx are guaranteed to be in bounds
763        elem_idx = unsafe { *indices.get_unchecked(i) };
764        if unsafe { data.get_unchecked(elem_idx) <= pivot_val } {
765            indices.swap(i, store_idx);
766            store_idx += 1;
767        }
768    }
769
770    // Move pivot to its final place
771    indices.swap(store_idx, right);
772    store_idx
773}
774
775// This implementation follows Method 3 from https://en.wikipedia.org/wiki/Quartile
776// It divides data into quarters based on the length n = 4k + r where r is remainder.
777// For each remainder case (0,1,2,3), it uses different formulas to compute Q1, Q2, Q3.
778fn quartiles_on_sorted<T>(data: &[Partial<T>]) -> Option<(f64, f64, f64)>
779where
780    T: PartialOrd + ToPrimitive,
781{
782    let len = data.len();
783
784    // Early return for small arrays
785    match len {
786        0..=2 => return None,
787        3 => {
788            return Some(
789                // SAFETY: We know these indices are valid because len == 3
790                unsafe {
791                    (
792                        data.get_unchecked(0).0.to_f64()?,
793                        data.get_unchecked(1).0.to_f64()?,
794                        data.get_unchecked(2).0.to_f64()?,
795                    )
796                },
797            );
798        }
799        _ => {}
800    }
801
802    // Calculate k and remainder in one division
803    let k = len / 4;
804    let remainder = len % 4;
805
806    // SAFETY: All index calculations below are guaranteed to be in bounds
807    // because we've verified len >= 4 above and k is len/4
808    unsafe {
809        Some(match remainder {
810            0 => {
811                // Let data = {x_i}_{i=0..4k} where k is positive integer.
812                // Median q2 = (x_{2k-1} + x_{2k}) / 2.
813                // If we divide data into two parts {x_i < q2} as L and
814                // {x_i > q2} as R, #L == #R == 2k holds true. Thus,
815                // q1 = (x_{k-1} + x_{k}) / 2 and q3 = (x_{3k-1} + x_{3k}) / 2.
816                // =============
817                // Simply put: Length is multiple of 4 (4k)
818                // q1 = (x_{k-1} + x_k) / 2
819                // q2 = (x_{2k-1} + x_{2k}) / 2
820                // q3 = (x_{3k-1} + x_{3k}) / 2
821                let q1 = f64::midpoint(
822                    data.get_unchecked(k - 1).0.to_f64()?,
823                    data.get_unchecked(k).0.to_f64()?,
824                );
825                let q2 = f64::midpoint(
826                    data.get_unchecked(2 * k - 1).0.to_f64()?,
827                    data.get_unchecked(2 * k).0.to_f64()?,
828                );
829                let q3 = f64::midpoint(
830                    data.get_unchecked(3 * k - 1).0.to_f64()?,
831                    data.get_unchecked(3 * k).0.to_f64()?,
832                );
833                (q1, q2, q3)
834            }
835            1 => {
836                // Let data = {x_i}_{i=0..4k+1} where k is positive integer.
837                // Median q2 = x_{2k}.
838                // If we divide data other than q2 into two parts {x_i < q2}
839                // as L and {x_i > q2} as R, #L == #R == 2k holds true. Thus,
840                // q1 = (x_{k-1} + x_{k}) / 2 and q3 = (x_{3k} + x_{3k+1}) / 2.
841                // =============
842                // Simply put: Length is 4k + 1
843                // q1 = (x_{k-1} + x_k) / 2
844                // q2 = x_{2k}
845                // q3 = (x_{3k} + x_{3k+1}) / 2
846                let q1 = f64::midpoint(
847                    data.get_unchecked(k - 1).0.to_f64()?,
848                    data.get_unchecked(k).0.to_f64()?,
849                );
850                let q2 = data.get_unchecked(2 * k).0.to_f64()?;
851                let q3 = f64::midpoint(
852                    data.get_unchecked(3 * k).0.to_f64()?,
853                    data.get_unchecked(3 * k + 1).0.to_f64()?,
854                );
855                (q1, q2, q3)
856            }
857            2 => {
858                // Let data = {x_i}_{i=0..4k+2} where k is positive integer.
859                // Median q2 = (x_{(2k+1)-1} + x_{2k+1}) / 2.
860                // If we divide data into two parts {x_i < q2} as L and
861                // {x_i > q2} as R, it's true that #L == #R == 2k+1.
862                // Thus, q1 = x_{k} and q3 = x_{3k+1}.
863                // =============
864                // Simply put: Length is 4k + 2
865                // q1 = x_k
866                // q2 = (x_{2k} + x_{2k+1}) / 2
867                // q3 = x_{3k+1}
868                let q1 = data.get_unchecked(k).0.to_f64()?;
869                let q2 = f64::midpoint(
870                    data.get_unchecked(2 * k).0.to_f64()?,
871                    data.get_unchecked(2 * k + 1).0.to_f64()?,
872                );
873                let q3 = data.get_unchecked(3 * k + 1).0.to_f64()?;
874                (q1, q2, q3)
875            }
876            _ => {
877                // Let data = {x_i}_{i=0..4k+3} where k is positive integer.
878                // Median q2 = x_{2k+1}.
879                // If we divide data other than q2 into two parts {x_i < q2}
880                // as L and {x_i > q2} as R, #L == #R == 2k+1 holds true.
881                // Thus, q1 = x_{k} and q3 = x_{3k+2}.
882                // =============
883                // Simply put: Length is 4k + 3
884                // q1 = x_k
885                // q2 = x_{2k+1}
886                // q3 = x_{3k+2}
887                let q1 = data.get_unchecked(k).0.to_f64()?;
888                let q2 = data.get_unchecked(2 * k + 1).0.to_f64()?;
889                let q3 = data.get_unchecked(3 * k + 2).0.to_f64()?;
890                (q1, q2, q3)
891            }
892        })
893    }
894}
895
896/// Compute quartiles using selection algorithm in O(n) time instead of O(n log n) sorting.
897/// This implementation follows Method 3 from `<https://en.wikipedia.org/wiki/Quartile>`
898fn quartiles_with_selection<T>(data: &mut [Partial<T>]) -> Option<(f64, f64, f64)>
899where
900    T: PartialOrd + ToPrimitive,
901{
902    let len = data.len();
903
904    // Early return for small arrays
905    match len {
906        0..=2 => return None,
907        3 => {
908            // For 3 elements, we need to find the sorted order using selection
909            let min_val = quickselect(data, 0)?.to_f64()?;
910            let med_val = quickselect(data, 1)?.to_f64()?;
911            let max_val = quickselect(data, 2)?.to_f64()?;
912            return Some((min_val, med_val, max_val));
913        }
914        _ => {}
915    }
916
917    // Calculate k and remainder in one division
918    let k = len / 4;
919    let remainder = len % 4;
920
921    // Use selection algorithm to find the required order statistics
922    match remainder {
923        0 => {
924            // Length is multiple of 4 (4k)
925            // Q1 = (x_{k-1} + x_k) / 2
926            // Q2 = (x_{2k-1} + x_{2k}) / 2
927            // Q3 = (x_{3k-1} + x_{3k}) / 2
928            let q1_low = quickselect(data, k - 1)?.to_f64()?;
929            let q1_high = quickselect(data, k)?.to_f64()?;
930            let q1 = f64::midpoint(q1_low, q1_high);
931
932            let q2_low = quickselect(data, 2 * k - 1)?.to_f64()?;
933            let q2_high = quickselect(data, 2 * k)?.to_f64()?;
934            let q2 = f64::midpoint(q2_low, q2_high);
935
936            let q3_low = quickselect(data, 3 * k - 1)?.to_f64()?;
937            let q3_high = quickselect(data, 3 * k)?.to_f64()?;
938            let q3 = f64::midpoint(q3_low, q3_high);
939
940            Some((q1, q2, q3))
941        }
942        1 => {
943            // Length is 4k + 1
944            // Q1 = (x_{k-1} + x_k) / 2
945            // Q2 = x_{2k}
946            // Q3 = (x_{3k} + x_{3k+1}) / 2
947            let q1_low = quickselect(data, k - 1)?.to_f64()?;
948            let q1_high = quickselect(data, k)?.to_f64()?;
949            let q1 = f64::midpoint(q1_low, q1_high);
950
951            let q2 = quickselect(data, 2 * k)?.to_f64()?;
952
953            let q3_low = quickselect(data, 3 * k)?.to_f64()?;
954            let q3_high = quickselect(data, 3 * k + 1)?.to_f64()?;
955            let q3 = f64::midpoint(q3_low, q3_high);
956
957            Some((q1, q2, q3))
958        }
959        2 => {
960            // Length is 4k + 2
961            // Q1 = x_k
962            // Q2 = (x_{2k} + x_{2k+1}) / 2
963            // Q3 = x_{3k+1}
964            let q1 = quickselect(data, k)?.to_f64()?;
965
966            let q2_low = quickselect(data, 2 * k)?.to_f64()?;
967            let q2_high = quickselect(data, 2 * k + 1)?.to_f64()?;
968            let q2 = f64::midpoint(q2_low, q2_high);
969
970            let q3 = quickselect(data, 3 * k + 1)?.to_f64()?;
971
972            Some((q1, q2, q3))
973        }
974        _ => {
975            // Length is 4k + 3
976            // Q1 = x_k
977            // Q2 = x_{2k+1}
978            // Q3 = x_{3k+2}
979            let q1 = quickselect(data, k)?.to_f64()?;
980            let q2 = quickselect(data, 2 * k + 1)?.to_f64()?;
981            let q3 = quickselect(data, 3 * k + 2)?.to_f64()?;
982
983            Some((q1, q2, q3))
984        }
985    }
986}
987
988/// Zero-copy quartiles computation using index-based selection.
989/// This avoids copying data by working with an array of indices.
990fn quartiles_with_zero_copy_selection<T>(data: &[Partial<T>]) -> Option<(f64, f64, f64)>
991where
992    T: PartialOrd + ToPrimitive,
993{
994    let len = data.len();
995
996    // Early return for small arrays
997    match len {
998        0..=2 => return None,
999        3 => {
1000            // For 3 elements, create indices and find sorted order
1001            let mut indices = vec![0, 1, 2];
1002            let min_val = quickselect_by_index(data, &mut indices, 0)?.to_f64()?;
1003            let med_val = quickselect_by_index(data, &mut indices, 1)?.to_f64()?;
1004            let max_val = quickselect_by_index(data, &mut indices, 2)?.to_f64()?;
1005            return Some((min_val, med_val, max_val));
1006        }
1007        _ => {}
1008    }
1009
1010    // Calculate k and remainder in one division
1011    let k = len / 4;
1012    let remainder = len % 4;
1013
1014    // Allocate indices array once and reuse it by resetting after each call
1015    let indices_template: Vec<usize> = (0..len).collect();
1016    #[allow(unused)]
1017    let mut indices: Vec<usize> = Vec::with_capacity(len);
1018
1019    // Use zero-copy selection algorithm to find the required order statistics
1020    // Note: Each quickselect_by_index call mutates the indices array, so we need
1021    // to reset it for each call to ensure correctness.
1022    match remainder {
1023        0 => {
1024            // Length is multiple of 4 (4k)
1025            indices.clone_from(&indices_template);
1026            let q1_low = quickselect_by_index(data, &mut indices, k - 1)?.to_f64()?;
1027            indices.clone_from(&indices_template);
1028            let q1_high = quickselect_by_index(data, &mut indices, k)?.to_f64()?;
1029            let q1 = f64::midpoint(q1_low, q1_high);
1030
1031            indices.clone_from(&indices_template);
1032            let q2_low = quickselect_by_index(data, &mut indices, 2 * k - 1)?.to_f64()?;
1033            indices.clone_from(&indices_template);
1034            let q2_high = quickselect_by_index(data, &mut indices, 2 * k)?.to_f64()?;
1035            let q2 = f64::midpoint(q2_low, q2_high);
1036
1037            indices.clone_from(&indices_template);
1038            let q3_low = quickselect_by_index(data, &mut indices, 3 * k - 1)?.to_f64()?;
1039            indices.clone_from(&indices_template);
1040            let q3_high = quickselect_by_index(data, &mut indices, 3 * k)?.to_f64()?;
1041            let q3 = f64::midpoint(q3_low, q3_high);
1042
1043            Some((q1, q2, q3))
1044        }
1045        1 => {
1046            // Length is 4k + 1
1047            indices.clone_from(&indices_template);
1048            let q1_low = quickselect_by_index(data, &mut indices, k - 1)?.to_f64()?;
1049            indices.clone_from(&indices_template);
1050            let q1_high = quickselect_by_index(data, &mut indices, k)?.to_f64()?;
1051            let q1 = f64::midpoint(q1_low, q1_high);
1052
1053            indices.clone_from(&indices_template);
1054            let q2 = quickselect_by_index(data, &mut indices, 2 * k)?.to_f64()?;
1055
1056            indices.clone_from(&indices_template);
1057            let q3_low = quickselect_by_index(data, &mut indices, 3 * k)?.to_f64()?;
1058            indices.clone_from(&indices_template);
1059            let q3_high = quickselect_by_index(data, &mut indices, 3 * k + 1)?.to_f64()?;
1060            let q3 = f64::midpoint(q3_low, q3_high);
1061
1062            Some((q1, q2, q3))
1063        }
1064        2 => {
1065            // Length is 4k + 2
1066            indices.clone_from(&indices_template);
1067            let q1 = quickselect_by_index(data, &mut indices, k)?.to_f64()?;
1068
1069            indices.clone_from(&indices_template);
1070            let q2_low = quickselect_by_index(data, &mut indices, 2 * k)?.to_f64()?;
1071            indices.clone_from(&indices_template);
1072            let q2_high = quickselect_by_index(data, &mut indices, 2 * k + 1)?.to_f64()?;
1073            let q2 = f64::midpoint(q2_low, q2_high);
1074
1075            indices.clone_from(&indices_template);
1076            let q3 = quickselect_by_index(data, &mut indices, 3 * k + 1)?.to_f64()?;
1077
1078            Some((q1, q2, q3))
1079        }
1080        _ => {
1081            // Length is 4k + 3
1082            indices.clone_from(&indices_template);
1083            let q1 = quickselect_by_index(data, &mut indices, k)?.to_f64()?;
1084            indices.clone_from(&indices_template);
1085            let q2 = quickselect_by_index(data, &mut indices, 2 * k + 1)?.to_f64()?;
1086            indices.clone_from(&indices_template);
1087            let q3 = quickselect_by_index(data, &mut indices, 3 * k + 2)?.to_f64()?;
1088
1089            Some((q1, q2, q3))
1090        }
1091    }
1092}
1093
1094fn mode_on_sorted<T, I>(it: I) -> Option<T>
1095where
1096    T: PartialOrd,
1097    I: Iterator<Item = T>,
1098{
1099    use std::cmp::Ordering;
1100
1101    // This approach to computing the mode works very nicely when the
1102    // number of samples is large and is close to its cardinality.
1103    // In other cases, a hashmap would be much better.
1104    // But really, how can we know this when given an arbitrary stream?
1105    // Might just switch to a hashmap to track frequencies. That would also
1106    // be generally useful for discovering the cardinality of a sample.
1107    let (mut mode, mut next) = (None, None);
1108    let (mut mode_count, mut next_count) = (0usize, 0usize);
1109    for x in it {
1110        if mode.as_ref() == Some(&x) {
1111            mode_count += 1;
1112        } else if next.as_ref() == Some(&x) {
1113            next_count += 1;
1114        } else {
1115            next = Some(x);
1116            next_count = 0;
1117        }
1118
1119        match next_count.cmp(&mode_count) {
1120            Ordering::Greater => {
1121                mode = next;
1122                mode_count = next_count;
1123                next = None;
1124                next_count = 0;
1125            }
1126            Ordering::Equal => {
1127                mode = None;
1128                mode_count = 0;
1129            }
1130            Ordering::Less => {}
1131        }
1132    }
1133    mode
1134}
1135
1136/// Computes both modes and antimodes from a sorted slice of values.
1137/// This version works with references to avoid unnecessary cloning.
1138///
1139/// # Arguments
1140///
1141/// * `data` - A sorted slice of values
1142///
1143/// # Notes
1144///
1145/// - Mode is the most frequently occurring value(s)
1146/// - Antimode is the least frequently occurring value(s)
1147/// - Only returns up to 10 antimodes to avoid returning the full set when all values are unique
1148/// - For empty slices, returns empty vectors and zero counts
1149/// - For single value slices, returns that value as the mode and empty antimode
1150/// - When all values occur exactly once, returns empty mode and up to 10 values as antimodes
1151///
1152/// # Returns
1153///
1154/// A tuple containing:
1155/// * Modes information: `(Vec<T>, usize, u32)` where:
1156///   - Vec<T>: Vector containing the mode values
1157///   - usize: Number of modes found
1158///   - u32: Frequency/count of the mode values
1159/// * Antimodes information: `(Vec<T>, usize, u32)` where:
1160///   - Vec<T>: Vector containing up to 10 antimode values
1161///   - usize: Total number of antimodes
1162///   - u32: Frequency/count of the antimode values
1163#[allow(clippy::type_complexity)]
1164#[inline]
1165fn modes_and_antimodes_on_sorted_slice<T>(
1166    data: &[Partial<T>],
1167) -> ((Vec<T>, usize, u32), (Vec<T>, usize, u32))
1168where
1169    T: PartialOrd + Clone,
1170{
1171    let size = data.len();
1172
1173    // Early return for empty slice
1174    if size == 0 {
1175        return ((Vec::new(), 0, 0), (Vec::new(), 0, 0));
1176    }
1177
1178    // Estimate capacity using integer square root of size
1179    // Integer square root using binary search (faster than floating point sqrt)
1180    let sqrt_size = if size == 0 {
1181        0
1182    } else {
1183        let mut x = size;
1184        let mut y = x.div_ceil(2);
1185        while y < x {
1186            x = y;
1187            y = usize::midpoint(x, size / x);
1188        }
1189        x
1190    };
1191    let mut runs: Vec<(&T, u32)> = Vec::with_capacity(sqrt_size.clamp(16, 1_000));
1192
1193    let mut current_value = &data[0].0;
1194    let mut current_count = 1;
1195    let mut highest_count = 1;
1196    let mut lowest_count = u32::MAX;
1197
1198    // Count consecutive runs - optimized to reduce allocations
1199    for x in data.iter().skip(1) {
1200        if x.0 == *current_value {
1201            current_count += 1;
1202            highest_count = highest_count.max(current_count);
1203        } else {
1204            runs.push((current_value, current_count));
1205            lowest_count = lowest_count.min(current_count);
1206            current_value = &x.0;
1207            current_count = 1;
1208        }
1209    }
1210    runs.push((current_value, current_count));
1211    lowest_count = lowest_count.min(current_count);
1212
1213    // Early return if only one unique value
1214    if runs.len() == 1 {
1215        let (val, count) = runs.pop().unwrap();
1216        return ((vec![val.clone()], 1, count), (Vec::new(), 0, 0));
1217    }
1218
1219    // Special case: if all values appear exactly once
1220    if highest_count == 1 {
1221        let antimodes_count = runs.len().min(10);
1222        let total_count = runs.len();
1223        let mut antimodes = Vec::with_capacity(antimodes_count);
1224        for (val, _) in runs.into_iter().take(antimodes_count) {
1225            antimodes.push(val.clone());
1226        }
1227        // For modes: empty, count 0, occurrences 0 (not 1, 1)
1228        return ((Vec::new(), 0, 0), (antimodes, total_count, 1));
1229    }
1230
1231    // Estimate capacities based on the number of runs
1232    // For modes: typically 1-3 modes, rarely more than 10% of runs
1233    // For antimodes: we only collect up to 10, but need to count all
1234    let estimated_modes = (runs.len() / 10).clamp(1, 10);
1235    let estimated_antimodes = 10.min(runs.len());
1236
1237    // Collect indices first to avoid unnecessary cloning
1238    let mut modes_indices = Vec::with_capacity(estimated_modes);
1239    let mut antimodes_indices = Vec::with_capacity(estimated_antimodes);
1240    let mut mode_count = 0;
1241    let mut antimodes_count = 0;
1242    let mut antimodes_collected = 0_u32;
1243
1244    // Count and collect mode/antimode indices simultaneously
1245    for (idx, (_, count)) in runs.iter().enumerate() {
1246        if *count == highest_count {
1247            modes_indices.push(idx);
1248            mode_count += 1;
1249        }
1250        if *count == lowest_count {
1251            antimodes_count += 1;
1252            if antimodes_collected < 10 {
1253                antimodes_indices.push(idx);
1254                antimodes_collected += 1;
1255            }
1256        }
1257    }
1258
1259    // Extract values only for the indices we need, cloning only at the end
1260    let modes_result: Vec<T> = modes_indices
1261        .into_iter()
1262        .map(|idx| runs[idx].0.clone())
1263        .collect();
1264    let antimodes_result: Vec<T> = antimodes_indices
1265        .into_iter()
1266        .map(|idx| runs[idx].0.clone())
1267        .collect();
1268
1269    (
1270        (modes_result, mode_count, highest_count),
1271        (antimodes_result, antimodes_count, lowest_count),
1272    )
1273}
1274
1275/// A commutative data structure for lazily sorted sequences of data.
1276///
1277/// The sort does not occur until statistics need to be computed.
1278///
1279/// Note that this works on types that do not define a total ordering like
1280/// `f32` and `f64`. When an ordering is not defined, an arbitrary order
1281/// is returned.
1282#[allow(clippy::unsafe_derive_deserialize)]
1283#[derive(Clone, Serialize, Deserialize, Eq, PartialEq)]
1284pub struct Unsorted<T> {
1285    sorted: bool,
1286    data: Vec<Partial<T>>,
1287}
1288
1289impl<T: PartialOrd> Unsorted<T> {
1290    /// Create initial empty state.
1291    #[inline]
1292    #[must_use]
1293    pub fn new() -> Unsorted<T> {
1294        Default::default()
1295    }
1296
1297    /// Add a new element to the set.
1298    #[allow(clippy::inline_always)]
1299    #[inline(always)]
1300    pub fn add(&mut self, v: T) {
1301        self.sorted = false;
1302        self.data.push(Partial(v));
1303    }
1304
1305    /// Return the number of data points.
1306    #[inline]
1307    #[must_use]
1308    pub const fn len(&self) -> usize {
1309        self.data.len()
1310    }
1311
1312    #[inline]
1313    #[must_use]
1314    pub const fn is_empty(&self) -> bool {
1315        self.data.is_empty()
1316    }
1317
1318    #[inline]
1319    fn sort(&mut self) {
1320        if !self.sorted {
1321            // Use sequential sort for small datasets (< 10k elements) to avoid parallel overhead
1322            if self.data.len() < PARALLEL_THRESHOLD {
1323                self.data.sort_unstable();
1324            } else {
1325                self.data.par_sort_unstable();
1326            }
1327            self.sorted = true;
1328        }
1329    }
1330
1331    #[inline]
1332    const fn already_sorted(&mut self) {
1333        self.sorted = true;
1334    }
1335
1336    /// Add multiple elements efficiently
1337    #[inline]
1338    pub fn add_bulk(&mut self, values: Vec<T>) {
1339        self.sorted = false;
1340        self.data.reserve(values.len());
1341        self.data.extend(values.into_iter().map(Partial));
1342    }
1343
1344    /// Shrink capacity to fit current data
1345    #[inline]
1346    pub fn shrink_to_fit(&mut self) {
1347        self.data.shrink_to_fit();
1348    }
1349
1350    /// Create with specific capacity
1351    #[inline]
1352    #[must_use]
1353    pub fn with_capacity(capacity: usize) -> Self {
1354        Unsorted {
1355            sorted: true,
1356            data: Vec::with_capacity(capacity),
1357        }
1358    }
1359
1360    /// Add a value assuming it's greater than all existing values
1361    #[inline]
1362    pub fn push_ascending(&mut self, value: T) {
1363        if let Some(last) = self.data.last() {
1364            debug_assert!(last.0 <= value, "Value must be >= than last element");
1365        }
1366        self.data.push(Partial(value));
1367        // Data remains sorted
1368    }
1369}
1370
1371impl<T: PartialOrd + PartialEq + Clone> Unsorted<T> {
1372    #[inline]
1373    /// Returns the cardinality of the data.
1374    /// Set `sorted` to `true` if the data is already sorted.
1375    /// Set `parallel_threshold` to `0` to force sequential processing.
1376    /// Set `parallel_threshold` to `1` to use the default parallel threshold (`10_000`).
1377    /// Set `parallel_threshold` to `2` to force parallel processing.
1378    /// Set `parallel_threshold` to any other value to use a custom parallel threshold
1379    /// greater than the default threshold of `10_000`.
1380    pub fn cardinality(&mut self, sorted: bool, parallel_threshold: usize) -> u64 {
1381        const CHUNK_SIZE: usize = 2048; // Process data in chunks of 2048 elements
1382        const DEFAULT_PARALLEL_THRESHOLD: usize = 10_240; // multiple of 2048
1383
1384        let len = self.data.len();
1385        match len {
1386            0 => return 0,
1387            1 => return 1,
1388            _ => {}
1389        }
1390
1391        if sorted {
1392            self.already_sorted();
1393        } else {
1394            self.sort();
1395        }
1396
1397        let use_parallel = parallel_threshold != 0
1398            && (parallel_threshold == 1
1399                || len > parallel_threshold.max(DEFAULT_PARALLEL_THRESHOLD));
1400
1401        if use_parallel {
1402            // Parallel processing using chunks
1403            // Process chunks in parallel, returning (count, first_elem, last_elem) for each
1404            type ChunkInfo<'a, T> = Vec<(u64, Option<&'a Partial<T>>, Option<&'a Partial<T>>)>;
1405            let chunk_info: ChunkInfo<'_, T> = self
1406                .data
1407                .par_chunks(CHUNK_SIZE)
1408                .map(|chunk| {
1409                    // Count unique elements within this chunk
1410                    let mut count = u64::from(!chunk.is_empty());
1411                    for window in chunk.windows(2) {
1412                        // safety: windows(2) guarantees window has length 2
1413                        if unsafe { window.get_unchecked(0) != window.get_unchecked(1) } {
1414                            count += 1;
1415                        }
1416                    }
1417                    (count, chunk.first(), chunk.last())
1418                })
1419                .collect();
1420
1421            // Combine results, checking boundaries between chunks
1422            let mut total = 0;
1423            for (i, &(count, first_opt, _last_opt)) in chunk_info.iter().enumerate() {
1424                total += count;
1425
1426                // Check boundary with previous chunk
1427                if i > 0
1428                    && let (Some(prev_last), Some(curr_first)) = (chunk_info[i - 1].2, first_opt)
1429                    && prev_last == curr_first
1430                {
1431                    total -= 1; // Deduct 1 if boundary values are equal
1432                }
1433            }
1434
1435            total
1436        } else {
1437            // Sequential processing
1438
1439            // the statement below is equivalent to:
1440            // let mut count = if self.data.is_empty() { 0 } else { 1 };
1441            let mut count = u64::from(!self.data.is_empty());
1442
1443            for window in self.data.windows(2) {
1444                // safety: windows(2) guarantees window has length 2
1445                if unsafe { window.get_unchecked(0) != window.get_unchecked(1) } {
1446                    count += 1;
1447                }
1448            }
1449            count
1450        }
1451    }
1452}
1453
1454impl<T: PartialOrd + Clone> Unsorted<T> {
1455    /// Returns the mode of the data.
1456    #[inline]
1457    pub fn mode(&mut self) -> Option<T> {
1458        if self.data.is_empty() {
1459            return None;
1460        }
1461        self.sort();
1462        mode_on_sorted(self.data.iter().map(|p| &p.0)).cloned()
1463    }
1464
1465    /// Returns the modes of the data.
1466    /// Note that there is also a `frequency::mode()` function that return one mode
1467    /// with the highest frequency. If there is a tie, it returns None.
1468    #[inline]
1469    fn modes(&mut self) -> (Vec<T>, usize, u32) {
1470        if self.data.is_empty() {
1471            return (Vec::new(), 0, 0);
1472        }
1473        self.sort();
1474        modes_and_antimodes_on_sorted_slice(&self.data).0
1475    }
1476
1477    /// Returns the antimodes of the data.
1478    /// `antimodes_result` only returns the first 10 antimodes
1479    #[inline]
1480    fn antimodes(&mut self) -> (Vec<T>, usize, u32) {
1481        if self.data.is_empty() {
1482            return (Vec::new(), 0, 0);
1483        }
1484        self.sort();
1485        modes_and_antimodes_on_sorted_slice(&self.data).1
1486    }
1487
1488    /// Returns the modes and antimodes of the data.
1489    /// `antimodes_result` only returns the first 10 antimodes
1490    #[allow(clippy::type_complexity)]
1491    #[inline]
1492    pub fn modes_antimodes(&mut self) -> ((Vec<T>, usize, u32), (Vec<T>, usize, u32)) {
1493        if self.data.is_empty() {
1494            return ((Vec::new(), 0, 0), (Vec::new(), 0, 0));
1495        }
1496        self.sort();
1497        modes_and_antimodes_on_sorted_slice(&self.data)
1498    }
1499}
1500
1501impl<T: PartialOrd + ToPrimitive> Unsorted<T> {
1502    /// Returns the median of the data.
1503    #[inline]
1504    pub fn median(&mut self) -> Option<f64> {
1505        if self.data.is_empty() {
1506            return None;
1507        }
1508        self.sort();
1509        median_on_sorted(&self.data)
1510    }
1511}
1512
1513impl<T: PartialOrd + ToPrimitive> Unsorted<T> {
1514    /// Returns the Median Absolute Deviation (MAD) of the data.
1515    #[inline]
1516    pub fn mad(&mut self, existing_median: Option<f64>) -> Option<f64> {
1517        if self.data.is_empty() {
1518            return None;
1519        }
1520        if existing_median.is_none() {
1521            self.sort();
1522        }
1523        mad_on_sorted(&self.data, existing_median)
1524    }
1525}
1526
1527impl<T: PartialOrd + ToPrimitive> Unsorted<T> {
1528    /// Returns the quartiles of the data using the traditional sorting approach.
1529    ///
1530    /// This method sorts the data first and then computes quartiles.
1531    /// Time complexity: O(n log n)
1532    #[inline]
1533    pub fn quartiles(&mut self) -> Option<(f64, f64, f64)> {
1534        if self.data.is_empty() {
1535            return None;
1536        }
1537        self.sort();
1538        quartiles_on_sorted(&self.data)
1539    }
1540}
1541
1542impl<T: PartialOrd + ToPrimitive + Sync> Unsorted<T> {
1543    /// Returns the Gini Coefficient of the data.
1544    ///
1545    /// The Gini Coefficient measures inequality in a distribution, ranging from 0 (perfect equality)
1546    /// to 1 (perfect inequality). This method sorts the data first and then computes the Gini coefficient.
1547    /// Time complexity: O(n log n)
1548    #[inline]
1549    pub fn gini(&mut self, precalc_sum: Option<f64>) -> Option<f64> {
1550        if self.data.is_empty() {
1551            return None;
1552        }
1553        self.sort();
1554        gini_on_sorted(&self.data, precalc_sum)
1555    }
1556
1557    /// Returns the kurtosis (excess kurtosis) of the data.
1558    ///
1559    /// Kurtosis measures the "tailedness" of a distribution. Excess kurtosis is kurtosis - 3,
1560    /// where 0 indicates a normal distribution, positive values indicate heavy tails, and
1561    /// negative values indicate light tails. This method sorts the data first and then computes kurtosis.
1562    /// Time complexity: O(n log n)
1563    #[inline]
1564    pub fn kurtosis(
1565        &mut self,
1566        precalc_mean: Option<f64>,
1567        precalc_variance: Option<f64>,
1568    ) -> Option<f64> {
1569        if self.data.is_empty() {
1570            return None;
1571        }
1572        self.sort();
1573        kurtosis_on_sorted(&self.data, precalc_mean, precalc_variance)
1574    }
1575
1576    /// Returns the percentile rank of a value in the data.
1577    ///
1578    /// Returns the percentile rank (0-100) of the given value. If the value is less than all
1579    /// values, returns 0.0. If greater than all, returns 100.0.
1580    /// This method sorts the data first and then computes the percentile rank.
1581    /// Time complexity: O(n log n)
1582    #[inline]
1583    pub fn percentile_rank<V>(&mut self, value: V) -> Option<f64>
1584    where
1585        V: PartialOrd + ToPrimitive,
1586    {
1587        if self.data.is_empty() {
1588            return None;
1589        }
1590        self.sort();
1591        percentile_rank_on_sorted(&self.data, &value)
1592    }
1593
1594    /// Returns the Atkinson Index of the data.
1595    ///
1596    /// The Atkinson Index measures inequality with an inequality aversion parameter ε.
1597    /// It ranges from 0 (perfect equality) to 1 (perfect inequality).
1598    /// Higher ε values give more weight to inequality at the lower end of the distribution.
1599    /// This method sorts the data first and then computes the Atkinson index.
1600    /// Time complexity: O(n log n)
1601    ///
1602    /// # Arguments
1603    /// * `epsilon` - Inequality aversion parameter (must be >= 0). Common values:
1604    ///   - 0.0: No inequality aversion (returns 0)
1605    ///   - 0.5: Moderate aversion
1606    ///   - 1.0: Uses geometric mean (special case)
1607    ///   - 2.0: High aversion
1608    /// * `precalc_mean` - Optional pre-calculated mean
1609    /// * `precalc_geometric_sum` - Optional pre-calculated geometric sum (sum of ln(val)), only used when epsilon = 1
1610    #[inline]
1611    pub fn atkinson(
1612        &mut self,
1613        epsilon: f64,
1614        precalc_mean: Option<f64>,
1615        precalc_geometric_sum: Option<f64>,
1616    ) -> Option<f64> {
1617        if self.data.is_empty() {
1618            return None;
1619        }
1620        self.sort();
1621        atkinson_on_sorted(&self.data, epsilon, precalc_mean, precalc_geometric_sum)
1622    }
1623}
1624
1625impl<T: PartialOrd + ToPrimitive + Clone> Unsorted<T> {
1626    /// Returns the quartiles of the data using selection algorithm.
1627    ///
1628    /// This implementation uses a selection algorithm (quickselect) to find quartiles
1629    /// in O(n) average time complexity instead of O(n log n) sorting.
1630    /// Requires T to implement Clone to create a working copy of the data.
1631    ///
1632    /// **Performance Note**: While theoretically O(n) vs O(n log n), this implementation
1633    /// is often slower than the sorting-based approach for small to medium datasets due to:
1634    /// - Need to find multiple order statistics (3 separate quickselect calls)
1635    /// - Overhead of copying data to avoid mutation
1636    /// - Rayon's highly optimized parallel sorting
1637    #[inline]
1638    pub fn quartiles_with_selection(&mut self) -> Option<(f64, f64, f64)> {
1639        if self.data.is_empty() {
1640            return None;
1641        }
1642        // If data is already sorted, use zero-copy approach to avoid cloning
1643        if self.sorted {
1644            return quartiles_with_zero_copy_selection(&self.data);
1645        }
1646        // Create a copy using collect to avoid mutating the original for selection algorithm
1647        let mut data_copy: Vec<Partial<T>> =
1648            self.data.iter().map(|x| Partial(x.0.clone())).collect();
1649        quartiles_with_selection(&mut data_copy)
1650    }
1651}
1652
1653impl<T: PartialOrd + ToPrimitive> Unsorted<T> {
1654    /// Returns the quartiles using zero-copy selection algorithm.
1655    ///
1656    /// This implementation avoids copying data by working with indices instead,
1657    /// providing better performance than the clone-based selection approach.
1658    /// The algorithm is O(n) average time and only allocates a vector of indices (usize).
1659    #[inline]
1660    #[must_use]
1661    pub fn quartiles_zero_copy(&self) -> Option<(f64, f64, f64)> {
1662        if self.data.is_empty() {
1663            return None;
1664        }
1665        quartiles_with_zero_copy_selection(&self.data)
1666    }
1667}
1668
1669impl<T: PartialOrd> Commute for Unsorted<T> {
1670    #[inline]
1671    fn merge(&mut self, mut v: Unsorted<T>) {
1672        if v.is_empty() {
1673            return;
1674        }
1675
1676        self.sorted = false;
1677        // we use std::mem::take to avoid unnecessary allocations
1678        self.data.extend(std::mem::take(&mut v.data));
1679    }
1680}
1681
1682impl<T: PartialOrd> Default for Unsorted<T> {
1683    #[inline]
1684    fn default() -> Unsorted<T> {
1685        Unsorted {
1686            data: Vec::with_capacity(10_000),
1687            sorted: true, // empty is sorted
1688        }
1689    }
1690}
1691
1692impl<T: PartialOrd> FromIterator<T> for Unsorted<T> {
1693    #[inline]
1694    fn from_iter<I: IntoIterator<Item = T>>(it: I) -> Unsorted<T> {
1695        let mut v = Unsorted::new();
1696        v.extend(it);
1697        v
1698    }
1699}
1700
1701impl<T: PartialOrd> Extend<T> for Unsorted<T> {
1702    #[inline]
1703    fn extend<I: IntoIterator<Item = T>>(&mut self, it: I) {
1704        self.sorted = false;
1705        self.data.extend(it.into_iter().map(Partial));
1706    }
1707}
1708
1709fn custom_percentiles_on_sorted<T>(data: &[Partial<T>], percentiles: &[u8]) -> Option<Vec<T>>
1710where
1711    T: PartialOrd + Clone,
1712{
1713    let len = data.len();
1714
1715    // Early return for empty array or invalid percentiles
1716    if len == 0 || percentiles.iter().any(|&p| p > 100) {
1717        return None;
1718    }
1719
1720    // Optimize: Check if percentiles are already sorted and unique
1721    let unique_percentiles: Vec<u8> = if percentiles.len() <= 1 {
1722        // Single or empty percentile - no need to sort/dedup
1723        percentiles.to_vec()
1724    } else {
1725        // Check if already sorted and unique (common case)
1726        let is_sorted_unique = percentiles.windows(2).all(|w| w[0] < w[1]);
1727
1728        if is_sorted_unique {
1729            // Already sorted and unique, use directly without cloning
1730            percentiles.to_vec()
1731        } else {
1732            // Need to sort and dedup - use HashSet for efficient deduplication
1733            use std::collections::HashSet;
1734            let mut seen = HashSet::with_capacity(percentiles.len().min(100));
1735            let mut sorted_unique = Vec::with_capacity(percentiles.len());
1736            for &p in percentiles {
1737                if seen.insert(p) {
1738                    sorted_unique.push(p);
1739                }
1740            }
1741            sorted_unique.sort_unstable();
1742            sorted_unique
1743        }
1744    };
1745
1746    let mut results = Vec::with_capacity(unique_percentiles.len());
1747
1748    // SAFETY: All index calculations below are guaranteed to be in bounds
1749    // because we've verified len > 0 and the rank calculation ensures
1750    // the index is within bounds
1751    unsafe {
1752        for &p in &unique_percentiles {
1753            // Calculate the ordinal rank using nearest-rank method
1754            // see https://en.wikipedia.org/wiki/Percentile#The_nearest-rank_method
1755            // n = ⌈(P/100) × N⌉
1756            #[allow(clippy::cast_sign_loss)]
1757            let rank = ((f64::from(p) / 100.0) * len as f64).ceil() as usize;
1758
1759            // Convert to 0-based index
1760            let idx = rank.saturating_sub(1);
1761
1762            // Get the value at that rank and extract the inner value
1763            results.push(data.get_unchecked(idx).0.clone());
1764        }
1765    }
1766
1767    Some(results)
1768}
1769
1770impl<T: PartialOrd + Clone> Unsorted<T> {
1771    /// Returns the requested percentiles of the data.
1772    ///
1773    /// Uses the nearest-rank method to compute percentiles.
1774    /// Each returned value is an actual value from the dataset.
1775    ///
1776    /// # Arguments
1777    /// * `percentiles` - A slice of u8 values representing percentiles to compute (0-100)
1778    ///
1779    /// # Returns
1780    /// * `None` if the data is empty or if any percentile is > 100
1781    /// * `Some(Vec<T>)` containing percentile values in the same order as requested
1782    ///
1783    /// # Example
1784    /// ```
1785    /// use stats::Unsorted;
1786    /// let mut data = Unsorted::new();
1787    /// data.extend(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
1788    /// let percentiles = vec![25, 50, 75];
1789    /// let results = data.custom_percentiles(&percentiles).unwrap();
1790    /// assert_eq!(results, vec![3, 5, 8]);
1791    /// ```
1792    #[inline]
1793    pub fn custom_percentiles(&mut self, percentiles: &[u8]) -> Option<Vec<T>> {
1794        if self.data.is_empty() {
1795            return None;
1796        }
1797        self.sort();
1798        custom_percentiles_on_sorted(&self.data, percentiles)
1799    }
1800}
1801
1802#[cfg(test)]
1803mod test {
1804    use super::*;
1805
1806    #[test]
1807    fn test_cardinality_empty() {
1808        let mut unsorted: Unsorted<i32> = Unsorted::new();
1809        assert_eq!(unsorted.cardinality(false, 1), 0);
1810    }
1811
1812    #[test]
1813    fn test_cardinality_single_element() {
1814        let mut unsorted = Unsorted::new();
1815        unsorted.add(5);
1816        assert_eq!(unsorted.cardinality(false, 1), 1);
1817    }
1818
1819    #[test]
1820    fn test_cardinality_unique_elements() {
1821        let mut unsorted = Unsorted::new();
1822        unsorted.extend(vec![1, 2, 3, 4, 5]);
1823        assert_eq!(unsorted.cardinality(false, 1), 5);
1824    }
1825
1826    #[test]
1827    fn test_cardinality_duplicate_elements() {
1828        let mut unsorted = Unsorted::new();
1829        unsorted.extend(vec![1, 2, 2, 3, 3, 3, 4, 4, 4, 4]);
1830        assert_eq!(unsorted.cardinality(false, 1), 4);
1831    }
1832
1833    #[test]
1834    fn test_cardinality_all_same() {
1835        let mut unsorted = Unsorted::new();
1836        unsorted.extend(vec![1; 100]);
1837        assert_eq!(unsorted.cardinality(false, 1), 1);
1838    }
1839
1840    #[test]
1841    fn test_cardinality_large_range() {
1842        let mut unsorted = Unsorted::new();
1843        unsorted.extend(0..1_000_000);
1844        assert_eq!(unsorted.cardinality(false, 1), 1_000_000);
1845    }
1846
1847    #[test]
1848    fn test_cardinality_large_range_sequential() {
1849        let mut unsorted = Unsorted::new();
1850        unsorted.extend(0..1_000_000);
1851        assert_eq!(unsorted.cardinality(false, 2_000_000), 1_000_000);
1852    }
1853
1854    #[test]
1855    fn test_cardinality_presorted() {
1856        let mut unsorted = Unsorted::new();
1857        unsorted.extend(vec![1, 2, 3, 4, 5]);
1858        unsorted.sort();
1859        assert_eq!(unsorted.cardinality(true, 1), 5);
1860    }
1861
1862    #[test]
1863    fn test_cardinality_float() {
1864        let mut unsorted = Unsorted::new();
1865        unsorted.extend(vec![1.0, 1.0, 2.0, 3.0, 3.0, 4.0]);
1866        assert_eq!(unsorted.cardinality(false, 1), 4);
1867    }
1868
1869    #[test]
1870    fn test_cardinality_string() {
1871        let mut unsorted = Unsorted::new();
1872        unsorted.extend(vec!["a", "b", "b", "c", "c", "c"]);
1873        assert_eq!(unsorted.cardinality(false, 1), 3);
1874    }
1875
1876    #[test]
1877    fn test_quartiles_selection_vs_sorted() {
1878        // Test that selection-based quartiles gives same results as sorting-based
1879        let test_cases = vec![
1880            vec![3, 5, 7, 9],
1881            vec![3, 5, 7],
1882            vec![1, 2, 7, 11],
1883            vec![3, 5, 7, 9, 12],
1884            vec![2, 2, 3, 8, 10],
1885            vec![3, 5, 7, 9, 12, 20],
1886            vec![0, 2, 4, 8, 10, 11],
1887            vec![3, 5, 7, 9, 12, 20, 21],
1888            vec![1, 5, 6, 6, 7, 10, 19],
1889        ];
1890
1891        for test_case in test_cases {
1892            let mut unsorted1 = Unsorted::new();
1893            let mut unsorted2 = Unsorted::new();
1894            let mut unsorted3 = Unsorted::new();
1895            unsorted1.extend(test_case.clone());
1896            unsorted2.extend(test_case.clone());
1897            unsorted3.extend(test_case.clone());
1898
1899            let result_sorted = unsorted1.quartiles();
1900            let result_selection = unsorted2.quartiles_with_selection();
1901            let result_zero_copy = unsorted3.quartiles_zero_copy();
1902
1903            assert_eq!(
1904                result_sorted, result_selection,
1905                "Selection mismatch for test case: {:?}",
1906                test_case
1907            );
1908            assert_eq!(
1909                result_sorted, result_zero_copy,
1910                "Zero-copy mismatch for test case: {:?}",
1911                test_case
1912            );
1913        }
1914    }
1915
1916    #[test]
1917    fn test_quartiles_with_selection_small() {
1918        // Test edge cases for selection-based quartiles
1919        let mut unsorted: Unsorted<i32> = Unsorted::new();
1920        assert_eq!(unsorted.quartiles_with_selection(), None);
1921
1922        let mut unsorted = Unsorted::new();
1923        unsorted.extend(vec![1, 2]);
1924        assert_eq!(unsorted.quartiles_with_selection(), None);
1925
1926        let mut unsorted = Unsorted::new();
1927        unsorted.extend(vec![1, 2, 3]);
1928        assert_eq!(unsorted.quartiles_with_selection(), Some((1.0, 2.0, 3.0)));
1929    }
1930
1931    #[test]
1932    fn test_quickselect() {
1933        let data = vec![
1934            Partial(3),
1935            Partial(1),
1936            Partial(4),
1937            Partial(1),
1938            Partial(5),
1939            Partial(9),
1940            Partial(2),
1941            Partial(6),
1942        ];
1943
1944        // Test finding different positions
1945        assert_eq!(quickselect(&mut data.clone(), 0), Some(&1));
1946        assert_eq!(quickselect(&mut data.clone(), 3), Some(&3));
1947        assert_eq!(quickselect(&mut data.clone(), 7), Some(&9));
1948
1949        // Test edge cases
1950        let mut empty: Vec<Partial<i32>> = vec![];
1951        assert_eq!(quickselect(&mut empty, 0), None);
1952
1953        let mut data = vec![Partial(3), Partial(1), Partial(4), Partial(1), Partial(5)];
1954        assert_eq!(quickselect(&mut data, 10), None); // k >= len
1955    }
1956
1957    #[test]
1958    fn median_stream() {
1959        assert_eq!(median(vec![3usize, 5, 7, 9].into_iter()), Some(6.0));
1960        assert_eq!(median(vec![3usize, 5, 7].into_iter()), Some(5.0));
1961    }
1962
1963    #[test]
1964    fn mad_stream() {
1965        assert_eq!(mad(vec![3usize, 5, 7, 9].into_iter(), None), Some(2.0));
1966        assert_eq!(
1967            mad(
1968                vec![
1969                    86usize, 60, 95, 39, 49, 12, 56, 82, 92, 24, 33, 28, 46, 34, 100, 39, 100, 38,
1970                    50, 61, 39, 88, 5, 13, 64
1971                ]
1972                .into_iter(),
1973                None
1974            ),
1975            Some(16.0)
1976        );
1977    }
1978
1979    #[test]
1980    fn mad_stream_precalc_median() {
1981        let data = vec![3usize, 5, 7, 9].into_iter();
1982        let median1 = median(data.clone());
1983        assert_eq!(mad(data, median1), Some(2.0));
1984
1985        let data2 = vec![
1986            86usize, 60, 95, 39, 49, 12, 56, 82, 92, 24, 33, 28, 46, 34, 100, 39, 100, 38, 50, 61,
1987            39, 88, 5, 13, 64,
1988        ]
1989        .into_iter();
1990        let median2 = median(data2.clone());
1991        assert_eq!(mad(data2, median2), Some(16.0));
1992    }
1993
1994    #[test]
1995    fn mode_stream() {
1996        assert_eq!(mode(vec![3usize, 5, 7, 9].into_iter()), None);
1997        assert_eq!(mode(vec![3usize, 3, 3, 3].into_iter()), Some(3));
1998        assert_eq!(mode(vec![3usize, 3, 3, 4].into_iter()), Some(3));
1999        assert_eq!(mode(vec![4usize, 3, 3, 3].into_iter()), Some(3));
2000        assert_eq!(mode(vec![1usize, 1, 2, 3, 3].into_iter()), None);
2001    }
2002
2003    #[test]
2004    fn median_floats() {
2005        assert_eq!(median(vec![3.0f64, 5.0, 7.0, 9.0].into_iter()), Some(6.0));
2006        assert_eq!(median(vec![3.0f64, 5.0, 7.0].into_iter()), Some(5.0));
2007    }
2008
2009    #[test]
2010    fn mode_floats() {
2011        assert_eq!(mode(vec![3.0f64, 5.0, 7.0, 9.0].into_iter()), None);
2012        assert_eq!(mode(vec![3.0f64, 3.0, 3.0, 3.0].into_iter()), Some(3.0));
2013        assert_eq!(mode(vec![3.0f64, 3.0, 3.0, 4.0].into_iter()), Some(3.0));
2014        assert_eq!(mode(vec![4.0f64, 3.0, 3.0, 3.0].into_iter()), Some(3.0));
2015        assert_eq!(mode(vec![1.0f64, 1.0, 2.0, 3.0, 3.0].into_iter()), None);
2016    }
2017
2018    #[test]
2019    fn modes_stream() {
2020        assert_eq!(modes(vec![3usize, 5, 7, 9].into_iter()), (vec![], 0, 0));
2021        assert_eq!(modes(vec![3usize, 3, 3, 3].into_iter()), (vec![3], 1, 4));
2022        assert_eq!(modes(vec![3usize, 3, 4, 4].into_iter()), (vec![3, 4], 2, 2));
2023        assert_eq!(modes(vec![4usize, 3, 3, 3].into_iter()), (vec![3], 1, 3));
2024        assert_eq!(modes(vec![1usize, 1, 2, 2].into_iter()), (vec![1, 2], 2, 2));
2025        let vec: Vec<u32> = vec![];
2026        assert_eq!(modes(vec.into_iter()), (vec![], 0, 0));
2027    }
2028
2029    #[test]
2030    fn modes_floats() {
2031        assert_eq!(
2032            modes(vec![3_f64, 5.0, 7.0, 9.0].into_iter()),
2033            (vec![], 0, 0)
2034        );
2035        assert_eq!(
2036            modes(vec![3_f64, 3.0, 3.0, 3.0].into_iter()),
2037            (vec![3.0], 1, 4)
2038        );
2039        assert_eq!(
2040            modes(vec![3_f64, 3.0, 4.0, 4.0].into_iter()),
2041            (vec![3.0, 4.0], 2, 2)
2042        );
2043        assert_eq!(
2044            modes(vec![1_f64, 1.0, 2.0, 3.0, 3.0].into_iter()),
2045            (vec![1.0, 3.0], 2, 2)
2046        );
2047    }
2048
2049    #[test]
2050    fn antimodes_stream() {
2051        assert_eq!(
2052            antimodes(vec![3usize, 5, 7, 9].into_iter()),
2053            (vec![3, 5, 7, 9], 4, 1)
2054        );
2055        assert_eq!(
2056            antimodes(vec![1usize, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13].into_iter()),
2057            (vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 13, 1)
2058        );
2059        assert_eq!(
2060            antimodes(vec![1usize, 3, 3, 3].into_iter()),
2061            (vec![1], 1, 1)
2062        );
2063        assert_eq!(
2064            antimodes(vec![3usize, 3, 4, 4].into_iter()),
2065            (vec![3, 4], 2, 2)
2066        );
2067        assert_eq!(
2068            antimodes(
2069                vec![
2070                    3usize, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
2071                    14, 14, 15, 15
2072                ]
2073                .into_iter()
2074            ),
2075            // we only show the first 10 of the 13 antimodes
2076            (vec![3, 4, 5, 6, 7, 8, 9, 10, 11, 12], 13, 2)
2077        );
2078        assert_eq!(
2079            antimodes(
2080                vec![
2081                    3usize, 3, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 4, 4, 5, 5, 6, 6, 7, 7, 13, 13,
2082                    14, 14, 15, 15
2083                ]
2084                .into_iter()
2085            ),
2086            (vec![3, 4, 5, 6, 7, 8, 9, 10, 11, 12], 13, 2)
2087        );
2088        assert_eq!(
2089            antimodes(vec![3usize, 3, 3, 4].into_iter()),
2090            (vec![4], 1, 1)
2091        );
2092        assert_eq!(
2093            antimodes(vec![4usize, 3, 3, 3].into_iter()),
2094            (vec![4], 1, 1)
2095        );
2096        assert_eq!(
2097            antimodes(vec![1usize, 1, 2, 2].into_iter()),
2098            (vec![1, 2], 2, 2)
2099        );
2100        let vec: Vec<u32> = vec![];
2101        assert_eq!(antimodes(vec.into_iter()), (vec![], 0, 0));
2102    }
2103
2104    #[test]
2105    fn antimodes_floats() {
2106        assert_eq!(
2107            antimodes(vec![3_f64, 5.0, 7.0, 9.0].into_iter()),
2108            (vec![3.0, 5.0, 7.0, 9.0], 4, 1)
2109        );
2110        assert_eq!(
2111            antimodes(vec![3_f64, 3.0, 3.0, 3.0].into_iter()),
2112            (vec![], 0, 0)
2113        );
2114        assert_eq!(
2115            antimodes(vec![3_f64, 3.0, 4.0, 4.0].into_iter()),
2116            (vec![3.0, 4.0], 2, 2)
2117        );
2118        assert_eq!(
2119            antimodes(vec![1_f64, 1.0, 2.0, 3.0, 3.0].into_iter()),
2120            (vec![2.0], 1, 1)
2121        );
2122    }
2123
2124    #[test]
2125    fn test_custom_percentiles() {
2126        // Test with integers
2127        let mut unsorted: Unsorted<i32> = Unsorted::new();
2128        unsorted.extend(1..=11); // [1,2,3,4,5,6,7,8,9,10,11]
2129
2130        let result = unsorted.custom_percentiles(&[25, 50, 75]).unwrap();
2131        assert_eq!(result, vec![3, 6, 9]);
2132
2133        // Test with strings
2134        let mut str_data = Unsorted::new();
2135        str_data.extend(vec!["a", "b", "c", "d", "e"]);
2136        let result = str_data.custom_percentiles(&[20, 40, 60, 80]).unwrap();
2137        assert_eq!(result, vec!["a", "b", "c", "d"]);
2138
2139        // Test with chars
2140        let mut char_data = Unsorted::new();
2141        char_data.extend('a'..='e');
2142        let result = char_data.custom_percentiles(&[25, 50, 75]).unwrap();
2143        assert_eq!(result, vec!['b', 'c', 'd']);
2144
2145        // Test with floats
2146        let mut float_data = Unsorted::new();
2147        float_data.extend(vec![1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9]);
2148        let result = float_data
2149            .custom_percentiles(&[10, 30, 50, 70, 90])
2150            .unwrap();
2151        assert_eq!(result, vec![1.1, 3.3, 5.5, 7.7, 9.9]);
2152
2153        // Test with empty percentiles array
2154        let result = float_data.custom_percentiles(&[]).unwrap();
2155        assert_eq!(result, Vec::<f64>::new());
2156
2157        // Test with duplicate percentiles
2158        let result = float_data.custom_percentiles(&[50, 50, 50]).unwrap();
2159        assert_eq!(result, vec![5.5]);
2160
2161        // Test with extreme percentiles
2162        let result = float_data.custom_percentiles(&[0, 100]).unwrap();
2163        assert_eq!(result, vec![1.1, 9.9]);
2164
2165        // Test with unsorted percentiles
2166        let result = float_data.custom_percentiles(&[75, 25, 50]).unwrap();
2167        assert_eq!(result, vec![3.3, 5.5, 7.7]); // results always sorted
2168
2169        // Test with single element
2170        let mut single = Unsorted::new();
2171        single.add(42);
2172        let result = single.custom_percentiles(&[0, 50, 100]).unwrap();
2173        assert_eq!(result, vec![42, 42, 42]);
2174    }
2175
2176    #[test]
2177    fn quartiles_stream() {
2178        assert_eq!(
2179            quartiles(vec![3usize, 5, 7].into_iter()),
2180            Some((3., 5., 7.))
2181        );
2182        assert_eq!(
2183            quartiles(vec![3usize, 5, 7, 9].into_iter()),
2184            Some((4., 6., 8.))
2185        );
2186        assert_eq!(
2187            quartiles(vec![1usize, 2, 7, 11].into_iter()),
2188            Some((1.5, 4.5, 9.))
2189        );
2190        assert_eq!(
2191            quartiles(vec![3usize, 5, 7, 9, 12].into_iter()),
2192            Some((4., 7., 10.5))
2193        );
2194        assert_eq!(
2195            quartiles(vec![2usize, 2, 3, 8, 10].into_iter()),
2196            Some((2., 3., 9.))
2197        );
2198        assert_eq!(
2199            quartiles(vec![3usize, 5, 7, 9, 12, 20].into_iter()),
2200            Some((5., 8., 12.))
2201        );
2202        assert_eq!(
2203            quartiles(vec![0usize, 2, 4, 8, 10, 11].into_iter()),
2204            Some((2., 6., 10.))
2205        );
2206        assert_eq!(
2207            quartiles(vec![3usize, 5, 7, 9, 12, 20, 21].into_iter()),
2208            Some((5., 9., 20.))
2209        );
2210        assert_eq!(
2211            quartiles(vec![1usize, 5, 6, 6, 7, 10, 19].into_iter()),
2212            Some((5., 6., 10.))
2213        );
2214    }
2215
2216    #[test]
2217    fn quartiles_floats() {
2218        assert_eq!(
2219            quartiles(vec![3_f64, 5., 7.].into_iter()),
2220            Some((3., 5., 7.))
2221        );
2222        assert_eq!(
2223            quartiles(vec![3_f64, 5., 7., 9.].into_iter()),
2224            Some((4., 6., 8.))
2225        );
2226        assert_eq!(
2227            quartiles(vec![3_f64, 5., 7., 9., 12.].into_iter()),
2228            Some((4., 7., 10.5))
2229        );
2230        assert_eq!(
2231            quartiles(vec![3_f64, 5., 7., 9., 12., 20.].into_iter()),
2232            Some((5., 8., 12.))
2233        );
2234        assert_eq!(
2235            quartiles(vec![3_f64, 5., 7., 9., 12., 20., 21.].into_iter()),
2236            Some((5., 9., 20.))
2237        );
2238    }
2239
2240    #[test]
2241    fn test_quartiles_zero_copy_small() {
2242        // Test edge cases for zero-copy quartiles
2243        let unsorted: Unsorted<i32> = Unsorted::new();
2244        assert_eq!(unsorted.quartiles_zero_copy(), None);
2245
2246        let mut unsorted = Unsorted::new();
2247        unsorted.extend(vec![1, 2]);
2248        assert_eq!(unsorted.quartiles_zero_copy(), None);
2249
2250        let mut unsorted = Unsorted::new();
2251        unsorted.extend(vec![1, 2, 3]);
2252        assert_eq!(unsorted.quartiles_zero_copy(), Some((1.0, 2.0, 3.0)));
2253
2254        // Test larger case
2255        let mut unsorted = Unsorted::new();
2256        unsorted.extend(vec![3, 5, 7, 9]);
2257        assert_eq!(unsorted.quartiles_zero_copy(), Some((4.0, 6.0, 8.0)));
2258    }
2259
2260    #[test]
2261    fn gini_empty() {
2262        let mut unsorted: Unsorted<i32> = Unsorted::new();
2263        assert_eq!(unsorted.gini(None), None);
2264        let empty_vec: Vec<i32> = vec![];
2265        assert_eq!(gini(empty_vec.into_iter(), None), None);
2266    }
2267
2268    #[test]
2269    fn gini_single_element() {
2270        let mut unsorted = Unsorted::new();
2271        unsorted.add(5);
2272        assert_eq!(unsorted.gini(None), Some(0.0));
2273        assert_eq!(gini(vec![5].into_iter(), None), Some(0.0));
2274    }
2275
2276    #[test]
2277    fn gini_perfect_equality() {
2278        // All values are the same - perfect equality, Gini = 0
2279        let mut unsorted = Unsorted::new();
2280        unsorted.extend(vec![10, 10, 10, 10, 10]);
2281        let result = unsorted.gini(None).unwrap();
2282        assert!((result - 0.0).abs() < 1e-10, "Expected 0.0, got {}", result);
2283
2284        assert!((gini(vec![10, 10, 10, 10, 10].into_iter(), None).unwrap() - 0.0).abs() < 1e-10);
2285    }
2286
2287    #[test]
2288    fn gini_perfect_inequality() {
2289        // One value has everything, others have zero - perfect inequality
2290        // For [0, 0, 0, 0, 100], Gini should be close to 1
2291        let mut unsorted = Unsorted::new();
2292        unsorted.extend(vec![0, 0, 0, 0, 100]);
2293        let result = unsorted.gini(None).unwrap();
2294        // Perfect inequality should give Gini close to 1
2295        // For n=5, one value=100, others=0: G = (2*5*100)/(5*100) - 6/5 = 2 - 1.2 = 0.8
2296        assert!((result - 0.8).abs() < 1e-10, "Expected 0.8, got {}", result);
2297    }
2298
2299    #[test]
2300    fn gini_stream() {
2301        // Test with known values
2302        // For [1, 2, 3, 4, 5]:
2303        // sum = 15
2304        // weighted_sum = 1*1 + 2*2 + 3*3 + 4*4 + 5*5 = 1 + 4 + 9 + 16 + 25 = 55
2305        // n = 5
2306        // G = (2 * 55) / (5 * 15) - 6/5 = 110/75 - 1.2 = 1.4667 - 1.2 = 0.2667
2307        let result = gini(vec![1usize, 2, 3, 4, 5].into_iter(), None).unwrap();
2308        let expected = (2.0 * 55.0) / (5.0 * 15.0) - 6.0 / 5.0;
2309        assert!(
2310            (result - expected).abs() < 1e-10,
2311            "Expected {}, got {}",
2312            expected,
2313            result
2314        );
2315    }
2316
2317    #[test]
2318    fn gini_floats() {
2319        let mut unsorted = Unsorted::new();
2320        unsorted.extend(vec![1.0, 2.0, 3.0, 4.0, 5.0]);
2321        let result = unsorted.gini(None).unwrap();
2322        let expected = (2.0 * 55.0) / (5.0 * 15.0) - 6.0 / 5.0;
2323        assert!((result - expected).abs() < 1e-10);
2324
2325        assert!(
2326            (gini(vec![1.0f64, 2.0, 3.0, 4.0, 5.0].into_iter(), None).unwrap() - expected).abs()
2327                < 1e-10
2328        );
2329    }
2330
2331    #[test]
2332    fn gini_all_zeros() {
2333        // All zeros - sum is zero, Gini is undefined
2334        let mut unsorted = Unsorted::new();
2335        unsorted.extend(vec![0, 0, 0, 0]);
2336        assert_eq!(unsorted.gini(None), None);
2337        assert_eq!(gini(vec![0, 0, 0, 0].into_iter(), None), None);
2338    }
2339
2340    #[test]
2341    fn gini_negative_values() {
2342        // Test with negative values (mathematically valid)
2343        let mut unsorted = Unsorted::new();
2344        unsorted.extend(vec![-5, -3, -1, 1, 3, 5]);
2345        let result = unsorted.gini(None);
2346        // Sum is 0, so Gini is undefined
2347        assert_eq!(result, None);
2348
2349        // Test with negative values that don't sum to zero
2350        let mut unsorted = Unsorted::new();
2351        unsorted.extend(vec![-2, -1, 0, 1, 2]);
2352        let result = unsorted.gini(None);
2353        // Sum is 0, so Gini is undefined
2354        assert_eq!(result, None);
2355
2356        // Test with values that sum to non-zero
2357        let mut unsorted = Unsorted::new();
2358        unsorted.extend(vec![-1, 0, 1, 2, 3]);
2359        let result = unsorted.gini(None);
2360        assert!(result.is_some());
2361    }
2362
2363    #[test]
2364    fn gini_known_cases() {
2365        // Test case: [1, 1, 1, 1, 1] - perfect equality
2366        let mut unsorted = Unsorted::new();
2367        unsorted.extend(vec![1, 1, 1, 1, 1]);
2368        let result = unsorted.gini(None).unwrap();
2369        assert!((result - 0.0).abs() < 1e-10);
2370
2371        // Test case: [0, 0, 0, 0, 1] - high inequality
2372        let mut unsorted = Unsorted::new();
2373        unsorted.extend(vec![0, 0, 0, 0, 1]);
2374        let result = unsorted.gini(None).unwrap();
2375        // G = (2 * 5 * 1) / (5 * 1) - 6/5 = 2 - 1.2 = 0.8
2376        assert!((result - 0.8).abs() < 1e-10);
2377
2378        // Test case: [1, 2, 3] - moderate inequality
2379        let mut unsorted = Unsorted::new();
2380        unsorted.extend(vec![1, 2, 3]);
2381        let result = unsorted.gini(None).unwrap();
2382        // sum = 6, weighted_sum = 1*1 + 2*2 + 3*3 = 1 + 4 + 9 = 14
2383        // G = (2 * 14) / (3 * 6) - 4/3 = 28/18 - 4/3 = 1.5556 - 1.3333 = 0.2222
2384        let expected = (2.0 * 14.0) / (3.0 * 6.0) - 4.0 / 3.0;
2385        assert!((result - expected).abs() < 1e-10);
2386    }
2387
2388    #[test]
2389    fn gini_precalc_sum() {
2390        // Test with pre-calculated sum
2391        let mut unsorted = Unsorted::new();
2392        unsorted.extend(vec![1, 2, 3, 4, 5]);
2393        let precalc_sum = Some(15.0);
2394        let result = unsorted.gini(precalc_sum).unwrap();
2395        let expected = (2.0 * 55.0) / (5.0 * 15.0) - 6.0 / 5.0;
2396        assert!((result - expected).abs() < 1e-10);
2397
2398        // Test that pre-calculated sum gives same result
2399        let mut unsorted2 = Unsorted::new();
2400        unsorted2.extend(vec![1, 2, 3, 4, 5]);
2401        let result2 = unsorted2.gini(None).unwrap();
2402        assert!((result - result2).abs() < 1e-10);
2403    }
2404
2405    #[test]
2406    fn gini_large_dataset() {
2407        // Test with larger dataset to exercise parallel path
2408        let data: Vec<i32> = (1..=1000).collect();
2409        let result = gini(data.iter().copied(), None);
2410        assert!(result.is_some());
2411        let gini_val = result.unwrap();
2412        // For uniform distribution, Gini should be positive but not too high
2413        assert!(gini_val > 0.0 && gini_val < 0.5);
2414    }
2415
2416    #[test]
2417    fn gini_unsorted_vs_sorted() {
2418        // Test that sorting doesn't affect result
2419        let mut unsorted1 = Unsorted::new();
2420        unsorted1.extend(vec![5, 2, 8, 1, 9, 3, 7, 4, 6]);
2421        let result1 = unsorted1.gini(None).unwrap();
2422
2423        let mut unsorted2 = Unsorted::new();
2424        unsorted2.extend(vec![1, 2, 3, 4, 5, 6, 7, 8, 9]);
2425        let result2 = unsorted2.gini(None).unwrap();
2426
2427        assert!((result1 - result2).abs() < 1e-10);
2428    }
2429
2430    #[test]
2431    fn gini_small_values() {
2432        // Test with very small values
2433        let mut unsorted = Unsorted::new();
2434        unsorted.extend(vec![0.001, 0.002, 0.003, 0.004, 0.005]);
2435        let result = unsorted.gini(None);
2436        assert!(result.is_some());
2437        // Should be same as [1, 2, 3, 4, 5] scaled down
2438        let expected = (2.0 * 55.0) / (5.0 * 15.0) - 6.0 / 5.0;
2439        assert!((result.unwrap() - expected).abs() < 1e-10);
2440    }
2441
2442    #[test]
2443    fn gini_large_values() {
2444        // Test with large values
2445        let mut unsorted = Unsorted::new();
2446        unsorted.extend(vec![1000, 2000, 3000, 4000, 5000]);
2447        let result = unsorted.gini(None);
2448        assert!(result.is_some());
2449        // Should be same as [1, 2, 3, 4, 5] scaled up
2450        let expected = (2.0 * 55.0) / (5.0 * 15.0) - 6.0 / 5.0;
2451        assert!((result.unwrap() - expected).abs() < 1e-10);
2452    }
2453
2454    #[test]
2455    fn gini_two_elements() {
2456        // Test with exactly 2 elements
2457        let mut unsorted = Unsorted::new();
2458        unsorted.extend(vec![1, 2]);
2459        let result = unsorted.gini(None).unwrap();
2460        // For [1, 2]: sum=3, weighted_sum=1*1+2*2=5, n=2
2461        // G = (2*5)/(2*3) - 3/2 = 10/6 - 1.5 = 1.6667 - 1.5 = 0.1667
2462        let expected = (2.0 * 5.0) / (2.0 * 3.0) - 3.0 / 2.0;
2463        assert!((result - expected).abs() < 1e-10);
2464    }
2465
2466    #[test]
2467    fn gini_precalc_sum_zero() {
2468        // Test with pre-calculated sum of zero (should return None)
2469        let mut unsorted = Unsorted::new();
2470        unsorted.extend(vec![1, 2, 3, 4, 5]);
2471        let result = unsorted.gini(Some(0.0));
2472        assert_eq!(result, None);
2473    }
2474
2475    #[test]
2476    fn gini_precalc_sum_negative() {
2477        // Test with negative sum
2478        // The function only checks if sum == 0.0, not if sum < 0.0
2479        // So negative sums will still compute Gini (though mathematically questionable)
2480        let mut unsorted = Unsorted::new();
2481        unsorted.extend(vec![-5, -3, -1, 1, 3]);
2482        let result = unsorted.gini(None);
2483        // Sum is -5, function doesn't check for negative, so it computes Gini
2484        // This is technically invalid but the function allows it
2485        assert!(result.is_some());
2486    }
2487
2488    #[test]
2489    fn gini_different_types() {
2490        // Test with different integer types
2491        let mut unsorted_u32 = Unsorted::new();
2492        unsorted_u32.extend(vec![1u32, 2, 3, 4, 5]);
2493        let result_u32 = unsorted_u32.gini(None).unwrap();
2494
2495        let mut unsorted_i64 = Unsorted::new();
2496        unsorted_i64.extend(vec![1i64, 2, 3, 4, 5]);
2497        let result_i64 = unsorted_i64.gini(None).unwrap();
2498
2499        let expected = (2.0 * 55.0) / (5.0 * 15.0) - 6.0 / 5.0;
2500        assert!((result_u32 - expected).abs() < 1e-10);
2501        assert!((result_i64 - expected).abs() < 1e-10);
2502    }
2503
2504    #[test]
2505    fn gini_extreme_inequality() {
2506        // Test with extreme inequality: one very large value, many zeros
2507        let mut unsorted = Unsorted::new();
2508        unsorted.extend(vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 1000]);
2509        let result = unsorted.gini(None).unwrap();
2510        // For [0,0,0,0,0,0,0,0,0,1000]: sum=1000, weighted_sum=10*1000=10000, n=10
2511        // G = (2*10000)/(10*1000) - 11/10 = 20/10 - 1.1 = 2 - 1.1 = 0.9
2512        assert!((result - 0.9).abs() < 1e-10);
2513    }
2514
2515    #[test]
2516    fn gini_duplicate_values() {
2517        // Test with many duplicate values
2518        let mut unsorted = Unsorted::new();
2519        unsorted.extend(vec![1, 1, 1, 5, 5, 5, 10, 10, 10]);
2520        let result = unsorted.gini(None);
2521        assert!(result.is_some());
2522        // Should be between 0 and 1
2523        let gini_val = result.unwrap();
2524        assert!(gini_val >= 0.0 && gini_val <= 1.0);
2525    }
2526
2527    #[test]
2528    fn kurtosis_empty() {
2529        let mut unsorted: Unsorted<i32> = Unsorted::new();
2530        assert_eq!(unsorted.kurtosis(None, None), None);
2531        let empty_vec: Vec<i32> = vec![];
2532        assert_eq!(kurtosis(empty_vec.into_iter(), None, None), None);
2533    }
2534
2535    #[test]
2536    fn kurtosis_small() {
2537        // Need at least 4 elements
2538        let mut unsorted = Unsorted::new();
2539        unsorted.extend(vec![1, 2]);
2540        assert_eq!(unsorted.kurtosis(None, None), None);
2541
2542        let mut unsorted = Unsorted::new();
2543        unsorted.extend(vec![1, 2, 3]);
2544        assert_eq!(unsorted.kurtosis(None, None), None);
2545    }
2546
2547    #[test]
2548    fn kurtosis_normal_distribution() {
2549        // Normal distribution should have kurtosis close to 0
2550        let mut unsorted = Unsorted::new();
2551        unsorted.extend(vec![1, 2, 3, 4, 5]);
2552        let result = unsorted.kurtosis(None, None);
2553        assert!(result.is_some());
2554        // For small samples, kurtosis can vary significantly
2555    }
2556
2557    #[test]
2558    fn kurtosis_all_same() {
2559        // All same values - variance is 0, kurtosis undefined
2560        let mut unsorted = Unsorted::new();
2561        unsorted.extend(vec![5, 5, 5, 5]);
2562        assert_eq!(unsorted.kurtosis(None, None), None);
2563    }
2564
2565    #[test]
2566    fn kurtosis_stream() {
2567        let result = kurtosis(vec![1usize, 2, 3, 4, 5].into_iter(), None, None);
2568        assert!(result.is_some());
2569    }
2570
2571    #[test]
2572    fn kurtosis_precalc_mean_variance() {
2573        // Test with pre-calculated mean and variance
2574        let mut unsorted = Unsorted::new();
2575        unsorted.extend(vec![1, 2, 3, 4, 5]);
2576
2577        // Calculate mean and variance manually
2578        let mean = 3.0f64;
2579        let variance = ((1.0f64 - 3.0).powi(2)
2580            + (2.0f64 - 3.0).powi(2)
2581            + (3.0f64 - 3.0).powi(2)
2582            + (4.0f64 - 3.0).powi(2)
2583            + (5.0f64 - 3.0).powi(2))
2584            / 5.0;
2585
2586        let result = unsorted.kurtosis(Some(mean), Some(variance));
2587        assert!(result.is_some());
2588
2589        // Test that pre-calculated values give same result
2590        let mut unsorted2 = Unsorted::new();
2591        unsorted2.extend(vec![1, 2, 3, 4, 5]);
2592        let result2 = unsorted2.kurtosis(None, None);
2593        assert!((result.unwrap() - result2.unwrap()).abs() < 1e-10);
2594    }
2595
2596    #[test]
2597    fn kurtosis_precalc_mean_only() {
2598        // Test with pre-calculated mean only
2599        let mut unsorted = Unsorted::new();
2600        unsorted.extend(vec![1, 2, 3, 4, 5]);
2601        let mean = 3.0f64;
2602
2603        let result = unsorted.kurtosis(Some(mean), None);
2604        assert!(result.is_some());
2605
2606        // Test that pre-calculated mean gives same result
2607        let mut unsorted2 = Unsorted::new();
2608        unsorted2.extend(vec![1, 2, 3, 4, 5]);
2609        let result2 = unsorted2.kurtosis(None, None);
2610        assert!((result.unwrap() - result2.unwrap()).abs() < 1e-10);
2611    }
2612
2613    #[test]
2614    fn kurtosis_precalc_variance_only() {
2615        // Test with pre-calculated variance only
2616        let mut unsorted = Unsorted::new();
2617        unsorted.extend(vec![1, 2, 3, 4, 5]);
2618        let variance = ((1.0f64 - 3.0).powi(2)
2619            + (2.0f64 - 3.0).powi(2)
2620            + (3.0f64 - 3.0).powi(2)
2621            + (4.0f64 - 3.0).powi(2)
2622            + (5.0f64 - 3.0).powi(2))
2623            / 5.0;
2624
2625        let result = unsorted.kurtosis(None, Some(variance));
2626        assert!(result.is_some());
2627
2628        // Test that pre-calculated variance gives same result
2629        let mut unsorted2 = Unsorted::new();
2630        unsorted2.extend(vec![1, 2, 3, 4, 5]);
2631        let result2 = unsorted2.kurtosis(None, None);
2632        assert!((result.unwrap() - result2.unwrap()).abs() < 1e-10);
2633    }
2634
2635    #[test]
2636    fn kurtosis_exact_calculation() {
2637        // Test with exact calculation for [1, 2, 3, 4]
2638        // Mean = 2.5
2639        // Variance = ((1-2.5)^2 + (2-2.5)^2 + (3-2.5)^2 + (4-2.5)^2) / 4 = (2.25 + 0.25 + 0.25 + 2.25) / 4 = 1.25
2640        // Variance^2 = 1.5625
2641        // Fourth powers: (1-2.5)^4 + (2-2.5)^4 + (3-2.5)^4 + (4-2.5)^4 = 5.0625 + 0.0625 + 0.0625 + 5.0625 = 10.25
2642        // n = 4
2643        // Kurtosis = (4*5*10.25) / (3*2*1*1.5625) - 3*3*3/(2*1) = 205 / 9.375 - 13.5 = 21.8667 - 13.5 = 8.3667
2644        let mut unsorted = Unsorted::new();
2645        unsorted.extend(vec![1, 2, 3, 4]);
2646        let result = unsorted.kurtosis(None, None).unwrap();
2647        // For small samples, kurtosis can be very high
2648        assert!(result.is_finite());
2649    }
2650
2651    #[test]
2652    fn kurtosis_uniform_distribution() {
2653        // Uniform distribution should have negative excess kurtosis
2654        let mut unsorted = Unsorted::new();
2655        unsorted.extend(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
2656        let result = unsorted.kurtosis(None, None).unwrap();
2657        // Uniform distribution has excess kurtosis = -1.2
2658        // But for small samples, it can vary significantly
2659        assert!(result.is_finite());
2660    }
2661
2662    #[test]
2663    fn kurtosis_large_dataset() {
2664        // Test with larger dataset to exercise parallel path
2665        let data: Vec<i32> = (1..=1000).collect();
2666        let result = kurtosis(data.iter().copied(), None, None);
2667        assert!(result.is_some());
2668        let kurt_val = result.unwrap();
2669        assert!(kurt_val.is_finite());
2670    }
2671
2672    #[test]
2673    fn kurtosis_unsorted_vs_sorted() {
2674        // Test that sorting doesn't affect result
2675        let mut unsorted1 = Unsorted::new();
2676        unsorted1.extend(vec![5, 2, 8, 1, 9, 3, 7, 4, 6]);
2677        let result1 = unsorted1.kurtosis(None, None).unwrap();
2678
2679        let mut unsorted2 = Unsorted::new();
2680        unsorted2.extend(vec![1, 2, 3, 4, 5, 6, 7, 8, 9]);
2681        let result2 = unsorted2.kurtosis(None, None).unwrap();
2682
2683        assert!((result1 - result2).abs() < 1e-10);
2684    }
2685
2686    #[test]
2687    fn kurtosis_minimum_size() {
2688        // Test with exactly 4 elements (minimum required)
2689        let mut unsorted = Unsorted::new();
2690        unsorted.extend(vec![1, 2, 3, 4]);
2691        let result = unsorted.kurtosis(None, None);
2692        assert!(result.is_some());
2693        assert!(result.unwrap().is_finite());
2694    }
2695
2696    #[test]
2697    fn kurtosis_heavy_tailed() {
2698        // Test with heavy-tailed distribution (outliers)
2699        let mut unsorted = Unsorted::new();
2700        unsorted.extend(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 100]);
2701        let result = unsorted.kurtosis(None, None).unwrap();
2702        // Heavy tails should give positive excess kurtosis
2703        assert!(result.is_finite());
2704        // With an outlier, kurtosis should be positive
2705        assert!(result > -10.0); // Allow some variance but should be reasonable
2706    }
2707
2708    #[test]
2709    fn kurtosis_light_tailed() {
2710        // Test with light-tailed distribution (values close together)
2711        let mut unsorted = Unsorted::new();
2712        unsorted.extend(vec![10, 11, 12, 13, 14, 15, 16, 17, 18, 19]);
2713        let result = unsorted.kurtosis(None, None).unwrap();
2714        // Light tails might give negative excess kurtosis
2715        assert!(result.is_finite());
2716    }
2717
2718    #[test]
2719    fn kurtosis_small_variance() {
2720        // Test with very small variance (values very close together)
2721        let mut unsorted = Unsorted::new();
2722        unsorted.extend(vec![10.0, 10.001, 10.002, 10.003, 10.004]);
2723        let result = unsorted.kurtosis(None, None);
2724        // Should still compute (variance is very small but non-zero)
2725        assert!(result.is_some());
2726        assert!(result.unwrap().is_finite());
2727    }
2728
2729    #[test]
2730    fn kurtosis_precalc_zero_variance() {
2731        // Test with pre-calculated variance of zero (should return None)
2732        let mut unsorted = Unsorted::new();
2733        unsorted.extend(vec![1, 2, 3, 4, 5]);
2734        let result = unsorted.kurtosis(None, Some(0.0));
2735        assert_eq!(result, None);
2736    }
2737
2738    #[test]
2739    fn kurtosis_precalc_negative_variance() {
2740        // Test with negative variance (invalid, but should handle gracefully)
2741        let mut unsorted = Unsorted::new();
2742        unsorted.extend(vec![1, 2, 3, 4, 5]);
2743        // Negative variance is invalid, but function should handle it
2744        let result = unsorted.kurtosis(None, Some(-1.0));
2745        // Should either return None or handle it gracefully
2746        // The function computes variance_sq = variance^2, so negative becomes positive
2747        // But this is invalid input, so behavior may vary
2748        // For now, just check it doesn't panic
2749        let _ = result;
2750    }
2751
2752    #[test]
2753    fn kurtosis_different_types() {
2754        // Test with different integer types
2755        let mut unsorted_u32 = Unsorted::new();
2756        unsorted_u32.extend(vec![1u32, 2, 3, 4, 5]);
2757        let result_u32 = unsorted_u32.kurtosis(None, None).unwrap();
2758
2759        let mut unsorted_i64 = Unsorted::new();
2760        unsorted_i64.extend(vec![1i64, 2, 3, 4, 5]);
2761        let result_i64 = unsorted_i64.kurtosis(None, None).unwrap();
2762
2763        assert!((result_u32 - result_i64).abs() < 1e-10);
2764    }
2765
2766    #[test]
2767    fn kurtosis_floating_point_precision() {
2768        // Test floating point precision
2769        let mut unsorted = Unsorted::new();
2770        unsorted.extend(vec![1.1, 2.2, 3.3, 4.4, 5.5]);
2771        let result = unsorted.kurtosis(None, None);
2772        assert!(result.is_some());
2773        assert!(result.unwrap().is_finite());
2774    }
2775
2776    #[test]
2777    fn kurtosis_negative_values() {
2778        // Test with negative values
2779        let mut unsorted = Unsorted::new();
2780        unsorted.extend(vec![-5, -3, -1, 1, 3, 5]);
2781        let result = unsorted.kurtosis(None, None);
2782        assert!(result.is_some());
2783        assert!(result.unwrap().is_finite());
2784    }
2785
2786    #[test]
2787    fn kurtosis_mixed_positive_negative() {
2788        // Test with mixed positive and negative values
2789        let mut unsorted = Unsorted::new();
2790        unsorted.extend(vec![-10, -5, 0, 5, 10]);
2791        let result = unsorted.kurtosis(None, None);
2792        assert!(result.is_some());
2793        assert!(result.unwrap().is_finite());
2794    }
2795
2796    #[test]
2797    fn kurtosis_duplicate_values() {
2798        // Test with duplicate values (but not all same)
2799        let mut unsorted = Unsorted::new();
2800        unsorted.extend(vec![1, 1, 2, 2, 3, 3, 4, 4, 5, 5]);
2801        let result = unsorted.kurtosis(None, None);
2802        assert!(result.is_some());
2803        assert!(result.unwrap().is_finite());
2804    }
2805
2806    #[test]
2807    fn kurtosis_precalc_mean_wrong() {
2808        // Test that wrong pre-calculated mean gives wrong result
2809        let mut unsorted1 = Unsorted::new();
2810        unsorted1.extend(vec![1, 2, 3, 4, 5]);
2811        let correct_result = unsorted1.kurtosis(None, None).unwrap();
2812
2813        let mut unsorted2 = Unsorted::new();
2814        unsorted2.extend(vec![1, 2, 3, 4, 5]);
2815        let wrong_mean = 10.0; // Wrong mean
2816        let wrong_result = unsorted2.kurtosis(Some(wrong_mean), None).unwrap();
2817
2818        // Results should be different
2819        assert!((correct_result - wrong_result).abs() > 1e-5);
2820    }
2821
2822    #[test]
2823    fn percentile_rank_empty() {
2824        let mut unsorted: Unsorted<i32> = Unsorted::new();
2825        assert_eq!(unsorted.percentile_rank(5), None);
2826        let empty_vec: Vec<i32> = vec![];
2827        assert_eq!(percentile_rank(empty_vec.into_iter(), 5), None);
2828    }
2829
2830    #[test]
2831    fn percentile_rank_basic() {
2832        let mut unsorted = Unsorted::new();
2833        unsorted.extend(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
2834
2835        // Value less than all
2836        assert_eq!(unsorted.percentile_rank(0), Some(0.0));
2837
2838        // Value greater than all
2839        assert_eq!(unsorted.percentile_rank(11), Some(100.0));
2840
2841        // Median (5) should be around 50th percentile
2842        let rank = unsorted.percentile_rank(5).unwrap();
2843        assert!((rank - 50.0).abs() < 1.0);
2844
2845        // First value should be at 10th percentile
2846        let rank = unsorted.percentile_rank(1).unwrap();
2847        assert!((rank - 10.0).abs() < 1.0);
2848    }
2849
2850    #[test]
2851    fn percentile_rank_duplicates() {
2852        let mut unsorted = Unsorted::new();
2853        unsorted.extend(vec![1, 1, 2, 2, 3, 3, 4, 4, 5, 5]);
2854
2855        // Value 2 appears twice, should be at 40th percentile (4 values <= 2)
2856        let rank = unsorted.percentile_rank(2).unwrap();
2857        assert!((rank - 40.0).abs() < 1.0);
2858    }
2859
2860    #[test]
2861    fn percentile_rank_stream() {
2862        let result = percentile_rank(vec![1usize, 2, 3, 4, 5].into_iter(), 3);
2863        assert_eq!(result, Some(60.0)); // 3 out of 5 values <= 3
2864    }
2865
2866    #[test]
2867    fn atkinson_empty() {
2868        let mut unsorted: Unsorted<i32> = Unsorted::new();
2869        assert_eq!(unsorted.atkinson(1.0, None, None), None);
2870        let empty_vec: Vec<i32> = vec![];
2871        assert_eq!(atkinson(empty_vec.into_iter(), 1.0, None, None), None);
2872    }
2873
2874    #[test]
2875    fn atkinson_single_element() {
2876        let mut unsorted = Unsorted::new();
2877        unsorted.add(5);
2878        assert_eq!(unsorted.atkinson(1.0, None, None), Some(0.0));
2879        assert_eq!(atkinson(vec![5].into_iter(), 1.0, None, None), Some(0.0));
2880    }
2881
2882    #[test]
2883    fn atkinson_perfect_equality() {
2884        // All values the same - perfect equality, Atkinson = 0
2885        let mut unsorted = Unsorted::new();
2886        unsorted.extend(vec![10, 10, 10, 10, 10]);
2887        let result = unsorted.atkinson(1.0, None, None).unwrap();
2888        assert!((result - 0.0).abs() < 1e-10);
2889    }
2890
2891    #[test]
2892    fn atkinson_epsilon_zero() {
2893        // Epsilon = 0 means no inequality aversion, should return 0
2894        let mut unsorted = Unsorted::new();
2895        unsorted.extend(vec![1, 2, 3, 4, 5]);
2896        let result = unsorted.atkinson(0.0, None, None).unwrap();
2897        assert!((result - 0.0).abs() < 1e-10);
2898    }
2899
2900    #[test]
2901    fn atkinson_epsilon_one() {
2902        // Epsilon = 1 uses geometric mean
2903        let mut unsorted = Unsorted::new();
2904        unsorted.extend(vec![1, 2, 3, 4, 5]);
2905        let result = unsorted.atkinson(1.0, None, None);
2906        assert!(result.is_some());
2907    }
2908
2909    #[test]
2910    fn atkinson_negative_epsilon() {
2911        let mut unsorted = Unsorted::new();
2912        unsorted.extend(vec![1, 2, 3, 4, 5]);
2913        assert_eq!(unsorted.atkinson(-1.0, None, None), None);
2914    }
2915
2916    #[test]
2917    fn atkinson_zero_mean() {
2918        // If mean is zero, Atkinson is undefined
2919        let mut unsorted = Unsorted::new();
2920        unsorted.extend(vec![0, 0, 0, 0]);
2921        assert_eq!(unsorted.atkinson(1.0, None, None), None);
2922    }
2923
2924    #[test]
2925    fn atkinson_stream() {
2926        let result = atkinson(vec![1usize, 2, 3, 4, 5].into_iter(), 1.0, None, None);
2927        assert!(result.is_some());
2928    }
2929
2930    #[test]
2931    fn atkinson_precalc_mean_geometric_sum() {
2932        // Test with pre-calculated mean and geometric_sum
2933        let mut unsorted = Unsorted::new();
2934        unsorted.extend(vec![1, 2, 3, 4, 5]);
2935
2936        // Calculate mean and geometric_sum manually
2937        let mean = 3.0f64;
2938        let geometric_sum = 1.0f64.ln() + 2.0f64.ln() + 3.0f64.ln() + 4.0f64.ln() + 5.0f64.ln();
2939
2940        let result = unsorted.atkinson(1.0, Some(mean), Some(geometric_sum));
2941        assert!(result.is_some());
2942
2943        // Test that pre-calculated values give same result
2944        let mut unsorted2 = Unsorted::new();
2945        unsorted2.extend(vec![1, 2, 3, 4, 5]);
2946        let result2 = unsorted2.atkinson(1.0, None, None);
2947        assert!((result.unwrap() - result2.unwrap()).abs() < 1e-10);
2948    }
2949
2950    #[test]
2951    fn atkinson_precalc_mean_only() {
2952        // Test with pre-calculated mean only
2953        let mut unsorted = Unsorted::new();
2954        unsorted.extend(vec![1, 2, 3, 4, 5]);
2955        let mean = 3.0f64;
2956
2957        let result = unsorted.atkinson(1.0, Some(mean), None);
2958        assert!(result.is_some());
2959
2960        // Test that pre-calculated mean gives same result
2961        let mut unsorted2 = Unsorted::new();
2962        unsorted2.extend(vec![1, 2, 3, 4, 5]);
2963        let result2 = unsorted2.atkinson(1.0, None, None);
2964        assert!((result.unwrap() - result2.unwrap()).abs() < 1e-10);
2965    }
2966
2967    #[test]
2968    fn atkinson_precalc_geometric_sum_only() {
2969        // Test with pre-calculated geometric_sum only
2970        let mut unsorted = Unsorted::new();
2971        unsorted.extend(vec![1, 2, 3, 4, 5]);
2972        let geometric_sum = 1.0f64.ln() + 2.0f64.ln() + 3.0f64.ln() + 4.0f64.ln() + 5.0f64.ln();
2973
2974        let result = unsorted.atkinson(1.0, None, Some(geometric_sum));
2975        assert!(result.is_some());
2976
2977        // Test that pre-calculated geometric_sum gives same result
2978        let mut unsorted2 = Unsorted::new();
2979        unsorted2.extend(vec![1, 2, 3, 4, 5]);
2980        let result2 = unsorted2.atkinson(1.0, None, None);
2981        assert!((result.unwrap() - result2.unwrap()).abs() < 1e-10);
2982    }
2983}
2984
2985#[cfg(test)]
2986mod bench {
2987    use super::*;
2988    use std::time::Instant;
2989
2990    #[test]
2991    #[ignore] // Run with `cargo test comprehensive_quartiles_benchmark -- --ignored --nocapture` to see performance comparison
2992    fn comprehensive_quartiles_benchmark() {
2993        // Test a wide range of data sizes
2994        let data_sizes = vec![
2995            1_000, 10_000, 100_000, 500_000, 1_000_000, 2_000_000, 5_000_000, 10_000_000,
2996        ];
2997
2998        println!("=== COMPREHENSIVE QUARTILES BENCHMARK ===\n");
2999
3000        for size in data_sizes {
3001            println!("--- Testing with {} elements ---", size);
3002
3003            // Test different data patterns
3004            let test_patterns = vec![
3005                ("Random", generate_random_data(size)),
3006                ("Reverse Sorted", {
3007                    let mut v = Vec::with_capacity(size);
3008                    for x in (0..size).rev() {
3009                        v.push(x as i32);
3010                    }
3011                    v
3012                }),
3013                ("Already Sorted", {
3014                    let mut v = Vec::with_capacity(size);
3015                    for x in 0..size {
3016                        v.push(x as i32);
3017                    }
3018                    v
3019                }),
3020                ("Many Duplicates", {
3021                    // Create a vector with just a few distinct values repeated many times
3022                    let mut v = Vec::with_capacity(size);
3023                    let chunk_size = size / 100;
3024                    for i in 0..100 {
3025                        v.extend(std::iter::repeat_n(i, chunk_size));
3026                    }
3027                    // Add any remaining elements
3028                    v.extend(std::iter::repeat_n(0, size - v.len()));
3029                    v
3030                }),
3031            ];
3032
3033            for (pattern_name, test_data) in test_patterns {
3034                println!("\n  Pattern: {}", pattern_name);
3035
3036                // Benchmark sorting-based approach
3037                let mut unsorted1 = Unsorted::new();
3038                unsorted1.extend(test_data.clone());
3039
3040                let start = Instant::now();
3041                let result_sorted = unsorted1.quartiles();
3042                let sorted_time = start.elapsed();
3043
3044                // Benchmark selection-based approach (with copying)
3045                let mut unsorted2 = Unsorted::new();
3046                unsorted2.extend(test_data.clone());
3047
3048                let start = Instant::now();
3049                let result_selection = unsorted2.quartiles_with_selection();
3050                let selection_time = start.elapsed();
3051
3052                // Benchmark zero-copy selection-based approach
3053                let mut unsorted3 = Unsorted::new();
3054                unsorted3.extend(test_data);
3055
3056                let start = Instant::now();
3057                let result_zero_copy = unsorted3.quartiles_zero_copy();
3058                let zero_copy_time = start.elapsed();
3059
3060                // Verify results are the same
3061                assert_eq!(result_sorted, result_selection);
3062                assert_eq!(result_sorted, result_zero_copy);
3063
3064                let selection_speedup =
3065                    sorted_time.as_nanos() as f64 / selection_time.as_nanos() as f64;
3066                let zero_copy_speedup =
3067                    sorted_time.as_nanos() as f64 / zero_copy_time.as_nanos() as f64;
3068
3069                println!("    Sorting:       {:>12?}", sorted_time);
3070                println!(
3071                    "    Selection:     {:>12?} (speedup: {:.2}x)",
3072                    selection_time, selection_speedup
3073                );
3074                println!(
3075                    "    Zero-copy:     {:>12?} (speedup: {:.2}x)",
3076                    zero_copy_time, zero_copy_speedup
3077                );
3078
3079                let best_algorithm =
3080                    if zero_copy_speedup > 1.0 && zero_copy_speedup >= selection_speedup {
3081                        "ZERO-COPY"
3082                    } else if selection_speedup > 1.0 {
3083                        "SELECTION"
3084                    } else {
3085                        "SORTING"
3086                    };
3087                println!("    Best: {}", best_algorithm);
3088            }
3089
3090            println!(); // Add blank line between sizes
3091        }
3092    }
3093
3094    // Generate random data for benchmarking
3095    fn generate_random_data(size: usize) -> Vec<i32> {
3096        // Simple LCG random number generator for reproducible results
3097        let mut rng = 1234567u64;
3098        let mut vec = Vec::with_capacity(size);
3099        for _ in 0..size {
3100            rng = rng.wrapping_mul(1103515245).wrapping_add(12345);
3101            vec.push((rng >> 16) as i32);
3102        }
3103        vec
3104    }
3105
3106    #[test]
3107    #[ignore] // Run with `cargo test find_selection_threshold -- --ignored --nocapture` to find exact threshold
3108    fn find_selection_threshold() {
3109        println!("=== FINDING SELECTION ALGORITHM THRESHOLD ===\n");
3110
3111        // Binary search approach to find the threshold
3112        let mut found_threshold = None;
3113        let test_sizes = vec![
3114            1_000_000, 2_000_000, 3_000_000, 4_000_000, 5_000_000, 7_500_000, 10_000_000,
3115            15_000_000, 20_000_000, 25_000_000, 30_000_000,
3116        ];
3117
3118        for size in test_sizes {
3119            println!("Testing size: {}", size);
3120
3121            // Use random data as it's most representative of real-world scenarios
3122            let test_data = generate_random_data(size);
3123
3124            // Run multiple iterations to get average performance
3125            let iterations = 3;
3126            let mut sorting_total = 0u128;
3127            let mut selection_total = 0u128;
3128            let mut zero_copy_total = 0u128;
3129
3130            for i in 0..iterations {
3131                println!("  Iteration {}/{}", i + 1, iterations);
3132
3133                // Sorting approach
3134                let mut unsorted1 = Unsorted::new();
3135                unsorted1.extend(test_data.clone());
3136
3137                let start = Instant::now();
3138                let _result_sorted = unsorted1.quartiles();
3139                sorting_total += start.elapsed().as_nanos();
3140
3141                // Selection approach (with copying)
3142                let mut unsorted2 = Unsorted::new();
3143                unsorted2.extend(test_data.clone());
3144
3145                let start = Instant::now();
3146                let _result_selection = unsorted2.quartiles_with_selection();
3147                selection_total += start.elapsed().as_nanos();
3148
3149                // Zero-copy selection approach
3150                let mut unsorted3 = Unsorted::new();
3151                unsorted3.extend(test_data.clone());
3152
3153                let start = Instant::now();
3154                let _result_zero_copy = unsorted3.quartiles_zero_copy();
3155                zero_copy_total += start.elapsed().as_nanos();
3156            }
3157
3158            let avg_sorting = sorting_total / iterations as u128;
3159            let avg_selection = selection_total / iterations as u128;
3160            let avg_zero_copy = zero_copy_total / iterations as u128;
3161            let selection_speedup = avg_sorting as f64 / avg_selection as f64;
3162            let zero_copy_speedup = avg_sorting as f64 / avg_zero_copy as f64;
3163
3164            println!(
3165                "  Average sorting:    {:>12.2}ms",
3166                avg_sorting as f64 / 1_000_000.0
3167            );
3168            println!(
3169                "  Average selection:  {:>12.2}ms (speedup: {:.2}x)",
3170                avg_selection as f64 / 1_000_000.0,
3171                selection_speedup
3172            );
3173            println!(
3174                "  Average zero-copy:  {:>12.2}ms (speedup: {:.2}x)",
3175                avg_zero_copy as f64 / 1_000_000.0,
3176                zero_copy_speedup
3177            );
3178
3179            if (selection_speedup > 1.0 || zero_copy_speedup > 1.0) && found_threshold.is_none() {
3180                found_threshold = Some(size);
3181                let best_method = if zero_copy_speedup > selection_speedup {
3182                    "Zero-copy"
3183                } else {
3184                    "Selection"
3185                };
3186                println!(
3187                    "  *** THRESHOLD FOUND: {} becomes faster at {} elements ***",
3188                    best_method, size
3189                );
3190            }
3191
3192            println!();
3193        }
3194
3195        match found_threshold {
3196            Some(threshold) => println!(
3197                "🎯 Selection algorithm becomes faster at approximately {} elements",
3198                threshold
3199            ),
3200            None => println!("❌ Selection algorithm did not become faster in the tested range"),
3201        }
3202    }
3203
3204    #[test]
3205    #[ignore] // Run with `cargo test benchmark_different_data_types -- --ignored --nocapture` to test different data types
3206    fn benchmark_different_data_types() {
3207        println!("=== BENCHMARKING DIFFERENT DATA TYPES ===\n");
3208
3209        let size = 5_000_000; // Use a large size where differences might be visible
3210
3211        // Test with f64 (floating point)
3212        println!("Testing with f64 data:");
3213        let float_data: Vec<f64> = generate_random_data(size)
3214            .into_iter()
3215            .map(|x| x as f64 / 1000.0)
3216            .collect();
3217
3218        let mut unsorted1 = Unsorted::new();
3219        unsorted1.extend(float_data.clone());
3220        let start = Instant::now();
3221        let _result = unsorted1.quartiles();
3222        let sorting_time = start.elapsed();
3223
3224        let mut unsorted2 = Unsorted::new();
3225        unsorted2.extend(float_data.clone());
3226        let start = Instant::now();
3227        let _result = unsorted2.quartiles_with_selection();
3228        let selection_time = start.elapsed();
3229
3230        let mut unsorted3 = Unsorted::new();
3231        unsorted3.extend(float_data);
3232        let start = Instant::now();
3233        let _result = unsorted3.quartiles_zero_copy();
3234        let zero_copy_time = start.elapsed();
3235
3236        println!("  Sorting:    {:?}", sorting_time);
3237        println!("  Selection:  {:?}", selection_time);
3238        println!("  Zero-copy:  {:?}", zero_copy_time);
3239        println!(
3240            "  Selection Speedup:  {:.2}x",
3241            sorting_time.as_nanos() as f64 / selection_time.as_nanos() as f64
3242        );
3243        println!(
3244            "  Zero-copy Speedup:  {:.2}x\n",
3245            sorting_time.as_nanos() as f64 / zero_copy_time.as_nanos() as f64
3246        );
3247
3248        // Test with i64 (larger integers)
3249        println!("Testing with i64 data:");
3250        let int64_data: Vec<i64> = generate_random_data(size)
3251            .into_iter()
3252            .map(|x| x as i64 * 1000)
3253            .collect();
3254
3255        let mut unsorted1 = Unsorted::new();
3256        unsorted1.extend(int64_data.clone());
3257        let start = Instant::now();
3258        let _result = unsorted1.quartiles();
3259        let sorting_time = start.elapsed();
3260
3261        let mut unsorted2 = Unsorted::new();
3262        unsorted2.extend(int64_data.clone());
3263        let start = Instant::now();
3264        let _result = unsorted2.quartiles_with_selection();
3265        let selection_time = start.elapsed();
3266
3267        let mut unsorted3 = Unsorted::new();
3268        unsorted3.extend(int64_data);
3269        let start = Instant::now();
3270        let _result = unsorted3.quartiles_zero_copy();
3271        let zero_copy_time = start.elapsed();
3272
3273        println!("  Sorting:    {:?}", sorting_time);
3274        println!("  Selection:  {:?}", selection_time);
3275        println!("  Zero-copy:  {:?}", zero_copy_time);
3276        println!(
3277            "  Selection Speedup:  {:.2}x",
3278            sorting_time.as_nanos() as f64 / selection_time.as_nanos() as f64
3279        );
3280        println!(
3281            "  Zero-copy Speedup:  {:.2}x",
3282            sorting_time.as_nanos() as f64 / zero_copy_time.as_nanos() as f64
3283        );
3284    }
3285}
stats/unsorted.rs

stats/
unsorted.rs