stats/
unsorted.rs

1use num_traits::ToPrimitive;
2use rayon::iter::{IndexedParallelIterator, IntoParallelRefIterator, ParallelIterator};
3use rayon::prelude::ParallelSlice;
4use rayon::slice::ParallelSliceMut;
5
6use serde::{Deserialize, Serialize};
7
8use {crate::Commute, crate::Partial};
9
10const PARALLEL_THRESHOLD: usize = 10_000;
11
12/// Compute the exact median on a stream of data.
13///
14/// (This has time complexity `O(nlogn)` and space complexity `O(n)`.)
15pub fn median<I>(it: I) -> Option<f64>
16where
17    I: Iterator,
18    <I as Iterator>::Item: PartialOrd + ToPrimitive,
19{
20    it.collect::<Unsorted<_>>().median()
21}
22
23/// Compute the median absolute deviation (MAD) on a stream of data.
24pub fn mad<I>(it: I, precalc_median: Option<f64>) -> Option<f64>
25where
26    I: Iterator,
27    <I as Iterator>::Item: PartialOrd + ToPrimitive,
28{
29    it.collect::<Unsorted<_>>().mad(precalc_median)
30}
31
32/// Compute the exact 1-, 2-, and 3-quartiles (Q1, Q2 a.k.a. median, and Q3) on a stream of data.
33///
34/// (This has time complexity `O(n log n)` and space complexity `O(n)`.)
35pub fn quartiles<I>(it: I) -> Option<(f64, f64, f64)>
36where
37    I: Iterator,
38    <I as Iterator>::Item: PartialOrd + ToPrimitive,
39{
40    it.collect::<Unsorted<_>>().quartiles()
41}
42
43/// Compute the exact mode on a stream of data.
44///
45/// (This has time complexity `O(nlogn)` and space complexity `O(n)`.)
46///
47/// If the data does not have a mode, then `None` is returned.
48pub fn mode<T, I>(it: I) -> Option<T>
49where
50    T: PartialOrd + Clone,
51    I: Iterator<Item = T>,
52{
53    it.collect::<Unsorted<T>>().mode()
54}
55
56/// Compute the modes on a stream of data.
57///
58/// If there is a single mode, then only that value is returned in the `Vec`
59/// however, if there are multiple values tied for occurring the most amount of times
60/// those values are returned.
61///
62/// ## Example
63/// ```
64/// use stats;
65///
66/// let vals = vec![1, 1, 2, 2, 3];
67///
68/// assert_eq!(stats::modes(vals.into_iter()), (vec![1, 2], 2, 2));
69/// ```
70/// This has time complexity `O(n)`
71///
72/// If the data does not have a mode, then an empty `Vec` is returned.
73pub fn modes<T, I>(it: I) -> (Vec<T>, usize, u32)
74where
75    T: PartialOrd + Clone,
76    I: Iterator<Item = T>,
77{
78    it.collect::<Unsorted<T>>().modes()
79}
80
81/// Compute the antimodes on a stream of data.
82///
83/// Antimode is the least frequent non-zero score.
84///
85/// If there is a single antimode, then only that value is returned in the `Vec`
86/// however, if there are multiple values tied for occurring the least amount of times
87/// those values are returned.
88///
89/// Only the first 10 antimodes are returned to prevent returning the whole set
90/// when cardinality = number of records (i.e. all unique values).
91///
92/// ## Example
93/// ```
94/// use stats;
95///
96/// let vals = vec![1, 1, 2, 2, 3];
97///
98/// assert_eq!(stats::antimodes(vals.into_iter()), (vec![3], 1, 1));
99/// ```
100/// This has time complexity `O(n)`
101///
102/// If the data does not have an antimode, then an empty `Vec` is returned.
103pub fn antimodes<T, I>(it: I) -> (Vec<T>, usize, u32)
104where
105    T: PartialOrd + Clone,
106    I: Iterator<Item = T>,
107{
108    let (antimodes_result, antimodes_count, antimodes_occurrences) =
109        it.collect::<Unsorted<T>>().antimodes();
110    (antimodes_result, antimodes_count, antimodes_occurrences)
111}
112
113/// Compute the Gini Coefficient on a stream of data.
114///
115/// The Gini Coefficient measures inequality in a distribution, ranging from 0 (perfect equality)
116/// to 1 (perfect inequality).
117///
118/// (This has time complexity `O(n log n)` and space complexity `O(n)`.)
119pub fn gini<I>(it: I, precalc_sum: Option<f64>) -> Option<f64>
120where
121    I: Iterator,
122    <I as Iterator>::Item: PartialOrd + ToPrimitive + Sync,
123{
124    it.collect::<Unsorted<_>>().gini(precalc_sum)
125}
126
127/// Compute the kurtosis (excess kurtosis) on a stream of data.
128///
129/// Kurtosis measures the "tailedness" of a distribution. Excess kurtosis is kurtosis - 3,
130/// where 0 indicates a normal distribution, positive values indicate heavy tails, and
131/// negative values indicate light tails.
132///
133/// (This has time complexity `O(n log n)` and space complexity `O(n)`.)
134pub fn kurtosis<I>(it: I, precalc_mean: Option<f64>, precalc_variance: Option<f64>) -> Option<f64>
135where
136    I: Iterator,
137    <I as Iterator>::Item: PartialOrd + ToPrimitive + Sync,
138{
139    it.collect::<Unsorted<_>>()
140        .kurtosis(precalc_mean, precalc_variance)
141}
142
143/// Compute the percentile rank of a value on a stream of data.
144///
145/// Returns the percentile rank (0-100) of the given value in the distribution.
146/// If the value is less than all values, returns 0.0. If greater than all, returns 100.0.
147///
148/// (This has time complexity `O(n log n)` and space complexity `O(n)`.)
149pub fn percentile_rank<I, V>(it: I, value: V) -> Option<f64>
150where
151    I: Iterator,
152    <I as Iterator>::Item: PartialOrd + ToPrimitive + Sync,
153    V: PartialOrd + ToPrimitive,
154{
155    it.collect::<Unsorted<_>>().percentile_rank(value)
156}
157
158/// Compute the Atkinson Index on a stream of data.
159///
160/// The Atkinson Index measures inequality with an inequality aversion parameter ε.
161/// It ranges from 0 (perfect equality) to 1 (perfect inequality).
162/// Higher ε values give more weight to inequality at the lower end of the distribution.
163///
164/// (This has time complexity `O(n log n)` and space complexity `O(n)`.)
165pub fn atkinson<I>(
166    it: I,
167    epsilon: f64,
168    precalc_mean: Option<f64>,
169    precalc_geometric_sum: Option<f64>,
170) -> Option<f64>
171where
172    I: Iterator,
173    <I as Iterator>::Item: PartialOrd + ToPrimitive + Sync,
174{
175    it.collect::<Unsorted<_>>()
176        .atkinson(epsilon, precalc_mean, precalc_geometric_sum)
177}
178
179fn median_on_sorted<T>(data: &[T]) -> Option<f64>
180where
181    T: PartialOrd + ToPrimitive,
182{
183    Some(match data.len() {
184        // Empty slice case - return None early
185        0 => return None,
186        // Single element case - return that element converted to f64
187        1 => data.first()?.to_f64()?,
188        // Even length case - average the two middle elements
189        len if len.is_multiple_of(2) => {
190            let idx = len / 2;
191            // Safety: we know these indices are valid because we checked len is even and non-zero,
192            // so idx-1 and idx are valid indices into data
193            let v1 = unsafe { data.get_unchecked(idx - 1) }.to_f64()?;
194            let v2 = unsafe { data.get_unchecked(idx) }.to_f64()?;
195            f64::midpoint(v1, v2)
196        }
197        // Odd length case - return the middle element
198        // Safety: we know the index is within bounds
199        len => unsafe { data.get_unchecked(len / 2) }.to_f64()?,
200    })
201}
202
203fn mad_on_sorted<T>(data: &[T], precalc_median: Option<f64>) -> Option<f64>
204where
205    T: Sync + PartialOrd + ToPrimitive,
206{
207    if data.is_empty() {
208        return None;
209    }
210    let median_obs = precalc_median.unwrap_or_else(|| median_on_sorted(data).unwrap());
211
212    // Use adaptive parallel processing based on data size
213    let mut abs_diff_vec = if data.len() < PARALLEL_THRESHOLD {
214        // Sequential processing for small datasets
215        let mut vec = Vec::with_capacity(data.len());
216        for x in data {
217            // SAFETY: to_f64() always returns Some for standard numeric types (f32/f64, i/u 8-64)
218            vec.push((median_obs - unsafe { x.to_f64().unwrap_unchecked() }).abs());
219        }
220        vec
221    } else {
222        // Parallel processing for large datasets
223        data.par_iter()
224            // SAFETY: to_f64() always returns Some for standard numeric types
225            .map(|x| (median_obs - unsafe { x.to_f64().unwrap_unchecked() }).abs())
226            .collect()
227    };
228
229    // Use adaptive sorting based on size
230    if abs_diff_vec.len() < PARALLEL_THRESHOLD {
231        // SAFETY: partial_cmp on non-NaN f64 (from abs()) always returns Some
232        abs_diff_vec.sort_unstable_by(|a, b| unsafe { a.partial_cmp(b).unwrap_unchecked() });
233    } else {
234        // SAFETY: partial_cmp on non-NaN f64 (from abs()) always returns Some
235        abs_diff_vec.par_sort_unstable_by(|a, b| unsafe { a.partial_cmp(b).unwrap_unchecked() });
236    }
237    median_on_sorted(&abs_diff_vec)
238}
239
240fn gini_on_sorted<T>(data: &[Partial<T>], precalc_sum: Option<f64>) -> Option<f64>
241where
242    T: Sync + PartialOrd + ToPrimitive,
243{
244    let len = data.len();
245
246    // Early return for empty data
247    if len == 0 {
248        return None;
249    }
250
251    // Single element case: perfect equality, Gini = 0
252    if len == 1 {
253        return Some(0.0);
254    }
255
256    // Use pre-calculated sum if provided, otherwise compute it
257    let sum = if let Some(precalc) = precalc_sum {
258        precalc
259    } else if len < PARALLEL_THRESHOLD {
260        let mut sum = 0.0;
261        for x in data {
262            // SAFETY: to_f64() always returns Some for standard numeric types (f32/f64, i/u 8-64)
263            sum += unsafe { x.0.to_f64().unwrap_unchecked() };
264        }
265        sum
266    } else {
267        data.par_iter()
268            // SAFETY: to_f64() always returns Some for standard numeric types
269            .map(|x| unsafe { x.0.to_f64().unwrap_unchecked() })
270            .sum()
271    };
272
273    // Compute weighted sum
274    let weighted_sum = if len < PARALLEL_THRESHOLD {
275        let mut weighted_sum = 0.0;
276        for (i, x) in data.iter().enumerate() {
277            // SAFETY: to_f64() always returns Some for standard numeric types
278            let val = unsafe { x.0.to_f64().unwrap_unchecked() };
279            weighted_sum += (i + 1) as f64 * val;
280        }
281        weighted_sum
282    } else {
283        // Compute weighted sum in parallel using enumerate to get indices
284        data.par_iter()
285            .enumerate()
286            .map(|(i, x)| {
287                // SAFETY: to_f64() always returns Some for standard numeric types
288                let val = unsafe { x.0.to_f64().unwrap_unchecked() };
289                (i + 1) as f64 * val
290            })
291            .sum()
292    };
293
294    // If sum is zero, Gini is undefined
295    if sum == 0.0 {
296        return None;
297    }
298
299    // Compute Gini coefficient using the formula:
300    // G = (2 * Σ(i * y_i)) / (n * Σ(y_i)) - (n + 1) / n
301    // where i is 1-indexed rank and y_i are sorted values
302    let n = len as f64;
303    let gini = (2.0 * weighted_sum) / (n * sum) - (n + 1.0) / n;
304
305    Some(gini)
306}
307
308fn kurtosis_on_sorted<T>(
309    data: &[Partial<T>],
310    precalc_mean: Option<f64>,
311    precalc_variance: Option<f64>,
312) -> Option<f64>
313where
314    T: Sync + PartialOrd + ToPrimitive,
315{
316    let len = data.len();
317
318    // Need at least 4 elements for meaningful kurtosis
319    if len < 4 {
320        return None;
321    }
322
323    // Use pre-calculated mean if provided, otherwise compute it
324    let mean = if let Some(precalc) = precalc_mean {
325        precalc
326    } else {
327        let sum: f64 = if len < PARALLEL_THRESHOLD {
328            let mut sum = 0.0;
329            for x in data {
330                // SAFETY: to_f64() always returns Some for standard numeric types (f32/f64, i/u 8-64)
331                sum += unsafe { x.0.to_f64().unwrap_unchecked() };
332            }
333            sum
334        } else {
335            data.par_iter()
336                // SAFETY: to_f64() always returns Some for standard numeric types
337                .map(|x| unsafe { x.0.to_f64().unwrap_unchecked() })
338                .sum()
339        };
340        sum / len as f64
341    };
342
343    // Compute variance_sq and fourth_power_sum
344    // If variance is provided, we can compute variance_sq directly (variance_sq = variance^2)
345    // Otherwise, we need to compute variance from the data
346    let (variance_sq, fourth_power_sum) = if let Some(variance) = precalc_variance {
347        // Use pre-calculated variance: variance_sq = variance^2
348        let variance_sq = variance * variance;
349
350        // Still need to compute fourth_power_sum
351        let fourth_power_sum = if len < PARALLEL_THRESHOLD {
352            let mut sum = 0.0;
353            for x in data {
354                // SAFETY: to_f64() always returns Some for standard numeric types
355                let val = unsafe { x.0.to_f64().unwrap_unchecked() };
356                let diff = val - mean;
357                let diff_sq = diff * diff;
358                sum += diff_sq * diff_sq;
359            }
360            sum
361        } else {
362            data.par_iter()
363                .map(|x| {
364                    // SAFETY: to_f64() always returns Some for standard numeric types
365                    let val = unsafe { x.0.to_f64().unwrap_unchecked() };
366                    let diff = val - mean;
367                    let diff_sq = diff * diff;
368                    diff_sq * diff_sq
369                })
370                .sum()
371        };
372
373        (variance_sq, fourth_power_sum)
374    } else {
375        // Compute both variance_sum and fourth_power_sum
376        let (variance_sum, fourth_power_sum) = if len < PARALLEL_THRESHOLD {
377            let mut variance_sum = 0.0;
378            let mut fourth_power_sum = 0.0;
379
380            for x in data {
381                // SAFETY: to_f64() always returns Some for standard numeric types
382                let val = unsafe { x.0.to_f64().unwrap_unchecked() };
383                let diff = val - mean;
384                let diff_sq = diff * diff;
385                variance_sum += diff_sq;
386                fourth_power_sum += diff_sq * diff_sq;
387            }
388
389            (variance_sum, fourth_power_sum)
390        } else {
391            // Single pass computing both sums simultaneously
392            data.par_iter()
393                .fold(
394                    || (0.0_f64, 0.0_f64),
395                    |acc, x| {
396                        // SAFETY: to_f64() always returns Some for standard numeric types
397                        let val = unsafe { x.0.to_f64().unwrap_unchecked() };
398                        let diff = val - mean;
399                        let diff_sq = diff * diff;
400                        (acc.0 + diff_sq, acc.1 + diff_sq * diff_sq)
401                    },
402                )
403                .reduce(|| (0.0, 0.0), |a, b| (a.0 + b.0, a.1 + b.1))
404        };
405
406        let variance = variance_sum / len as f64;
407
408        // If variance is zero, all values are the same, kurtosis is undefined
409        if variance == 0.0 {
410            return None;
411        }
412
413        let variance_sq = variance * variance;
414        (variance_sq, fourth_power_sum)
415    };
416
417    // If variance_sq is zero, all values are the same, kurtosis is undefined
418    if variance_sq == 0.0 {
419        return None;
420    }
421
422    let n = len as f64;
423
424    // Sample excess kurtosis formula:
425    // kurtosis = (n(n+1) * Σ((x_i - mean)⁴)) / ((n-1)(n-2)(n-3) * variance²) - 3(n-1)²/((n-2)(n-3))
426    let kurtosis = (n * (n + 1.0) * fourth_power_sum)
427        / ((n - 1.0) * (n - 2.0) * (n - 3.0) * variance_sq)
428        - 3.0 * (n - 1.0) * (n - 1.0) / ((n - 2.0) * (n - 3.0));
429
430    Some(kurtosis)
431}
432
433fn percentile_rank_on_sorted<T, V>(data: &[Partial<T>], value: &V) -> Option<f64>
434where
435    T: PartialOrd + ToPrimitive,
436    V: PartialOrd + ToPrimitive,
437{
438    let len = data.len();
439
440    if len == 0 {
441        return None;
442    }
443
444    let value_f64 = value.to_f64()?;
445
446    // Binary search to find the position where value would be inserted
447    // This gives us the number of values <= value
448    let count_leq = data.binary_search_by(|x| {
449        x.0.to_f64()
450            .unwrap_or(f64::NAN)
451            .partial_cmp(&value_f64)
452            .unwrap_or(std::cmp::Ordering::Less)
453    });
454
455    let count = match count_leq {
456        Ok(idx) => {
457            // Value found at idx, but we need to count all equal values
458            let mut count = idx + 1;
459            // Count all equal values after idx
460            for x in data.iter().skip(idx + 1) {
461                if let Some(x_val) = x.0.to_f64() {
462                    if (x_val - value_f64).abs() < 1e-9 {
463                        count += 1;
464                    } else {
465                        break;
466                    }
467                } else {
468                    break;
469                }
470            }
471            count
472        }
473        Err(idx) => idx, // Number of values less than value
474    };
475
476    // Percentile rank = (count / n) * 100
477    Some((count as f64 / len as f64) * 100.0)
478}
479
480fn atkinson_on_sorted<T>(
481    data: &[Partial<T>],
482    epsilon: f64,
483    precalc_mean: Option<f64>,
484    precalc_geometric_sum: Option<f64>,
485) -> Option<f64>
486where
487    T: Sync + PartialOrd + ToPrimitive,
488{
489    let len = data.len();
490
491    // Early return for empty data
492    if len == 0 {
493        return None;
494    }
495
496    // Single element case: perfect equality, Atkinson = 0
497    if len == 1 {
498        return Some(0.0);
499    }
500
501    // Epsilon must be non-negative
502    if epsilon < 0.0 {
503        return None;
504    }
505
506    // Use pre-calculated mean if provided, otherwise compute it
507    let mean = if let Some(precalc) = precalc_mean {
508        precalc
509    } else {
510        let sum: f64 = if len < PARALLEL_THRESHOLD {
511            let mut sum = 0.0;
512            for x in data {
513                // SAFETY: to_f64() always returns Some for standard numeric types (f32/f64, i/u 8-64)
514                sum += unsafe { x.0.to_f64().unwrap_unchecked() };
515            }
516            sum
517        } else {
518            data.par_iter()
519                // SAFETY: to_f64() always returns Some for standard numeric types
520                .map(|x| unsafe { x.0.to_f64().unwrap_unchecked() })
521                .sum()
522        };
523        sum / len as f64
524    };
525
526    // If mean is zero, Atkinson is undefined
527    if mean == 0.0 {
528        return None;
529    }
530
531    // Handle special case: epsilon = 1 (uses geometric mean)
532    if (epsilon - 1.0).abs() < 1e-10 {
533        // A_1 = 1 - (geometric_mean / mean)
534        let geometric_sum: f64 = if let Some(precalc) = precalc_geometric_sum {
535            precalc
536        } else if len < PARALLEL_THRESHOLD {
537            let mut sum = 0.0;
538            for x in data {
539                // SAFETY: to_f64() always returns Some for standard numeric types
540                let val = unsafe { x.0.to_f64().unwrap_unchecked() };
541                if val <= 0.0 {
542                    // Geometric mean undefined for non-positive values
543                    return None;
544                }
545                sum += val.ln();
546            }
547            sum
548        } else {
549            data.par_iter()
550                .map(|x| {
551                    // SAFETY: to_f64() always returns Some for standard numeric types
552                    let val = unsafe { x.0.to_f64().unwrap_unchecked() };
553                    if val <= 0.0 {
554                        return f64::NAN;
555                    }
556                    val.ln()
557                })
558                .sum()
559        };
560
561        if geometric_sum.is_nan() {
562            return None;
563        }
564
565        let geometric_mean = (geometric_sum / len as f64).exp();
566        return Some(1.0 - geometric_mean / mean);
567    }
568
569    // General case: epsilon != 1
570    // A_ε = 1 - (1/n * Σ((x_i/mean)^(1-ε)))^(1/(1-ε))
571    let exponent = 1.0 - epsilon;
572
573    let sum_powered: f64 = if len < PARALLEL_THRESHOLD {
574        let mut sum = 0.0;
575        for x in data {
576            // SAFETY: to_f64() always returns Some for standard numeric types
577            let val = unsafe { x.0.to_f64().unwrap_unchecked() };
578            if val < 0.0 {
579                // Negative values with non-integer exponent are undefined
580                return None;
581            }
582            let ratio = val / mean;
583            sum += ratio.powf(exponent);
584        }
585        sum
586    } else {
587        data.par_iter()
588            .map(|x| {
589                // SAFETY: to_f64() always returns Some for standard numeric types
590                let val = unsafe { x.0.to_f64().unwrap_unchecked() };
591                if val < 0.0 {
592                    return f64::NAN;
593                }
594                let ratio = val / mean;
595                ratio.powf(exponent)
596            })
597            .sum()
598    };
599
600    if sum_powered.is_nan() || sum_powered <= 0.0 {
601        return None;
602    }
603
604    let atkinson = 1.0 - (sum_powered / len as f64).powf(1.0 / exponent);
605    Some(atkinson)
606}
607
608/// Selection algorithm to find the k-th smallest element in O(n) average time.
609/// This is an implementation of quickselect algorithm.
610fn quickselect<T>(data: &mut [Partial<T>], k: usize) -> Option<&T>
611where
612    T: PartialOrd,
613{
614    if data.is_empty() || k >= data.len() {
615        return None;
616    }
617
618    let mut left = 0;
619    let mut right = data.len() - 1;
620
621    loop {
622        if left == right {
623            return Some(&data[left].0);
624        }
625
626        // Use median-of-three pivot selection for better performance
627        let pivot_idx = median_of_three_pivot(data, left, right);
628        let pivot_idx = partition(data, left, right, pivot_idx);
629
630        match k.cmp(&pivot_idx) {
631            std::cmp::Ordering::Equal => return Some(&data[pivot_idx].0),
632            std::cmp::Ordering::Less => right = pivot_idx - 1,
633            std::cmp::Ordering::Greater => left = pivot_idx + 1,
634        }
635    }
636}
637
638/// Zero-copy selection algorithm that works with indices instead of copying data.
639/// This avoids the overhead of cloning data elements.
640fn quickselect_by_index<'a, T>(
641    data: &'a [Partial<T>],
642    indices: &mut [usize],
643    k: usize,
644) -> Option<&'a T>
645where
646    T: PartialOrd,
647{
648    if data.is_empty() || indices.is_empty() || k >= indices.len() {
649        return None;
650    }
651
652    let mut left = 0;
653    let mut right = indices.len() - 1;
654
655    loop {
656        if left == right {
657            return Some(&data[indices[left]].0);
658        }
659
660        // Use median-of-three pivot selection for better performance
661        let pivot_idx = median_of_three_pivot_by_index(data, indices, left, right);
662        let pivot_idx = partition_by_index(data, indices, left, right, pivot_idx);
663
664        match k.cmp(&pivot_idx) {
665            std::cmp::Ordering::Equal => return Some(&data[indices[pivot_idx]].0),
666            std::cmp::Ordering::Less => right = pivot_idx - 1,
667            std::cmp::Ordering::Greater => left = pivot_idx + 1,
668        }
669    }
670}
671
672/// Select the median of three elements as pivot for better quickselect performance
673fn median_of_three_pivot<T>(data: &[Partial<T>], left: usize, right: usize) -> usize
674where
675    T: PartialOrd,
676{
677    let mid = left + (right - left) / 2;
678
679    if data[left] <= data[mid] {
680        if data[mid] <= data[right] {
681            mid
682        } else if data[left] <= data[right] {
683            right
684        } else {
685            left
686        }
687    } else if data[left] <= data[right] {
688        left
689    } else if data[mid] <= data[right] {
690        right
691    } else {
692        mid
693    }
694}
695
696/// Select the median of three elements as pivot using indices
697fn median_of_three_pivot_by_index<T>(
698    data: &[Partial<T>],
699    indices: &[usize],
700    left: usize,
701    right: usize,
702) -> usize
703where
704    T: PartialOrd,
705{
706    let mid = left + (right - left) / 2;
707
708    if data[indices[left]] <= data[indices[mid]] {
709        if data[indices[mid]] <= data[indices[right]] {
710            mid
711        } else if data[indices[left]] <= data[indices[right]] {
712            right
713        } else {
714            left
715        }
716    } else if data[indices[left]] <= data[indices[right]] {
717        left
718    } else if data[indices[mid]] <= data[indices[right]] {
719        right
720    } else {
721        mid
722    }
723}
724
725/// Partition function for quickselect
726fn partition<T>(data: &mut [Partial<T>], left: usize, right: usize, pivot_idx: usize) -> usize
727where
728    T: PartialOrd,
729{
730    // Move pivot to end
731    data.swap(pivot_idx, right);
732    let mut store_idx = left;
733
734    // Move all elements smaller than pivot to the left
735    // Cache pivot position for better cache locality (access data[right] directly each time)
736    for i in left..right {
737        // Safety: i, store_idx, and right are guaranteed to be in bounds
738        // Compare directly with pivot at data[right] - compiler should optimize this access
739        if unsafe { data.get_unchecked(i) <= data.get_unchecked(right) } {
740            data.swap(i, store_idx);
741            store_idx += 1;
742        }
743    }
744
745    // Move pivot to its final place
746    data.swap(store_idx, right);
747    store_idx
748}
749
750/// Partition function for quickselect using indices
751fn partition_by_index<T>(
752    data: &[Partial<T>],
753    indices: &mut [usize],
754    left: usize,
755    right: usize,
756    pivot_idx: usize,
757) -> usize
758where
759    T: PartialOrd,
760{
761    // Move pivot to end
762    indices.swap(pivot_idx, right);
763    let mut store_idx = left;
764
765    // Cache pivot index and value for better cache locality
766    // This reduces indirection: indices[right] -> data[indices[right]]
767    // Safety: right is guaranteed to be in bounds
768    let pivot_idx_cached = unsafe { *indices.get_unchecked(right) };
769    let pivot_val = unsafe { data.get_unchecked(pivot_idx_cached) };
770
771    // Move all elements smaller than pivot to the left
772    let mut elem_idx: usize;
773    for i in left..right {
774        // Safety: i and store_idx are guaranteed to be in bounds
775        elem_idx = unsafe { *indices.get_unchecked(i) };
776        if unsafe { data.get_unchecked(elem_idx) <= pivot_val } {
777            indices.swap(i, store_idx);
778            store_idx += 1;
779        }
780    }
781
782    // Move pivot to its final place
783    indices.swap(store_idx, right);
784    store_idx
785}
786
787// This implementation follows Method 3 from https://en.wikipedia.org/wiki/Quartile
788// It divides data into quarters based on the length n = 4k + r where r is remainder.
789// For each remainder case (0,1,2,3), it uses different formulas to compute Q1, Q2, Q3.
790fn quartiles_on_sorted<T>(data: &[Partial<T>]) -> Option<(f64, f64, f64)>
791where
792    T: PartialOrd + ToPrimitive,
793{
794    let len = data.len();
795
796    // Early return for small arrays
797    match len {
798        0..=2 => return None,
799        3 => {
800            return Some(
801                // SAFETY: We know these indices are valid because len == 3
802                unsafe {
803                    (
804                        data.get_unchecked(0).0.to_f64()?,
805                        data.get_unchecked(1).0.to_f64()?,
806                        data.get_unchecked(2).0.to_f64()?,
807                    )
808                },
809            );
810        }
811        _ => {}
812    }
813
814    // Calculate k and remainder in one division
815    let k = len / 4;
816    let remainder = len % 4;
817
818    // SAFETY: All index calculations below are guaranteed to be in bounds
819    // because we've verified len >= 4 above and k is len/4
820    unsafe {
821        Some(match remainder {
822            0 => {
823                // Let data = {x_i}_{i=0..4k} where k is positive integer.
824                // Median q2 = (x_{2k-1} + x_{2k}) / 2.
825                // If we divide data into two parts {x_i < q2} as L and
826                // {x_i > q2} as R, #L == #R == 2k holds true. Thus,
827                // q1 = (x_{k-1} + x_{k}) / 2 and q3 = (x_{3k-1} + x_{3k}) / 2.
828                // =============
829                // Simply put: Length is multiple of 4 (4k)
830                // q1 = (x_{k-1} + x_k) / 2
831                // q2 = (x_{2k-1} + x_{2k}) / 2
832                // q3 = (x_{3k-1} + x_{3k}) / 2
833                let q1 = f64::midpoint(
834                    data.get_unchecked(k - 1).0.to_f64()?,
835                    data.get_unchecked(k).0.to_f64()?,
836                );
837                let q2 = f64::midpoint(
838                    data.get_unchecked(2 * k - 1).0.to_f64()?,
839                    data.get_unchecked(2 * k).0.to_f64()?,
840                );
841                let q3 = f64::midpoint(
842                    data.get_unchecked(3 * k - 1).0.to_f64()?,
843                    data.get_unchecked(3 * k).0.to_f64()?,
844                );
845                (q1, q2, q3)
846            }
847            1 => {
848                // Let data = {x_i}_{i=0..4k+1} where k is positive integer.
849                // Median q2 = x_{2k}.
850                // If we divide data other than q2 into two parts {x_i < q2}
851                // as L and {x_i > q2} as R, #L == #R == 2k holds true. Thus,
852                // q1 = (x_{k-1} + x_{k}) / 2 and q3 = (x_{3k} + x_{3k+1}) / 2.
853                // =============
854                // Simply put: Length is 4k + 1
855                // q1 = (x_{k-1} + x_k) / 2
856                // q2 = x_{2k}
857                // q3 = (x_{3k} + x_{3k+1}) / 2
858                let q1 = f64::midpoint(
859                    data.get_unchecked(k - 1).0.to_f64()?,
860                    data.get_unchecked(k).0.to_f64()?,
861                );
862                let q2 = data.get_unchecked(2 * k).0.to_f64()?;
863                let q3 = f64::midpoint(
864                    data.get_unchecked(3 * k).0.to_f64()?,
865                    data.get_unchecked(3 * k + 1).0.to_f64()?,
866                );
867                (q1, q2, q3)
868            }
869            2 => {
870                // Let data = {x_i}_{i=0..4k+2} where k is positive integer.
871                // Median q2 = (x_{(2k+1)-1} + x_{2k+1}) / 2.
872                // If we divide data into two parts {x_i < q2} as L and
873                // {x_i > q2} as R, it's true that #L == #R == 2k+1.
874                // Thus, q1 = x_{k} and q3 = x_{3k+1}.
875                // =============
876                // Simply put: Length is 4k + 2
877                // q1 = x_k
878                // q2 = (x_{2k} + x_{2k+1}) / 2
879                // q3 = x_{3k+1}
880                let q1 = data.get_unchecked(k).0.to_f64()?;
881                let q2 = f64::midpoint(
882                    data.get_unchecked(2 * k).0.to_f64()?,
883                    data.get_unchecked(2 * k + 1).0.to_f64()?,
884                );
885                let q3 = data.get_unchecked(3 * k + 1).0.to_f64()?;
886                (q1, q2, q3)
887            }
888            _ => {
889                // Let data = {x_i}_{i=0..4k+3} where k is positive integer.
890                // Median q2 = x_{2k+1}.
891                // If we divide data other than q2 into two parts {x_i < q2}
892                // as L and {x_i > q2} as R, #L == #R == 2k+1 holds true.
893                // Thus, q1 = x_{k} and q3 = x_{3k+2}.
894                // =============
895                // Simply put: Length is 4k + 3
896                // q1 = x_k
897                // q2 = x_{2k+1}
898                // q3 = x_{3k+2}
899                let q1 = data.get_unchecked(k).0.to_f64()?;
900                let q2 = data.get_unchecked(2 * k + 1).0.to_f64()?;
901                let q3 = data.get_unchecked(3 * k + 2).0.to_f64()?;
902                (q1, q2, q3)
903            }
904        })
905    }
906}
907
908/// Compute quartiles using selection algorithm in O(n) time instead of O(n log n) sorting.
909/// This implementation follows Method 3 from `<https://en.wikipedia.org/wiki/Quartile>`
910fn quartiles_with_selection<T>(data: &mut [Partial<T>]) -> Option<(f64, f64, f64)>
911where
912    T: PartialOrd + ToPrimitive,
913{
914    let len = data.len();
915
916    // Early return for small arrays
917    match len {
918        0..=2 => return None,
919        3 => {
920            // For 3 elements, we need to find the sorted order using selection
921            let min_val = quickselect(data, 0)?.to_f64()?;
922            let med_val = quickselect(data, 1)?.to_f64()?;
923            let max_val = quickselect(data, 2)?.to_f64()?;
924            return Some((min_val, med_val, max_val));
925        }
926        _ => {}
927    }
928
929    // Calculate k and remainder in one division
930    let k = len / 4;
931    let remainder = len % 4;
932
933    // Use selection algorithm to find the required order statistics
934    match remainder {
935        0 => {
936            // Length is multiple of 4 (4k)
937            // Q1 = (x_{k-1} + x_k) / 2
938            // Q2 = (x_{2k-1} + x_{2k}) / 2
939            // Q3 = (x_{3k-1} + x_{3k}) / 2
940            let q1_low = quickselect(data, k - 1)?.to_f64()?;
941            let q1_high = quickselect(data, k)?.to_f64()?;
942            let q1 = f64::midpoint(q1_low, q1_high);
943
944            let q2_low = quickselect(data, 2 * k - 1)?.to_f64()?;
945            let q2_high = quickselect(data, 2 * k)?.to_f64()?;
946            let q2 = f64::midpoint(q2_low, q2_high);
947
948            let q3_low = quickselect(data, 3 * k - 1)?.to_f64()?;
949            let q3_high = quickselect(data, 3 * k)?.to_f64()?;
950            let q3 = f64::midpoint(q3_low, q3_high);
951
952            Some((q1, q2, q3))
953        }
954        1 => {
955            // Length is 4k + 1
956            // Q1 = (x_{k-1} + x_k) / 2
957            // Q2 = x_{2k}
958            // Q3 = (x_{3k} + x_{3k+1}) / 2
959            let q1_low = quickselect(data, k - 1)?.to_f64()?;
960            let q1_high = quickselect(data, k)?.to_f64()?;
961            let q1 = f64::midpoint(q1_low, q1_high);
962
963            let q2 = quickselect(data, 2 * k)?.to_f64()?;
964
965            let q3_low = quickselect(data, 3 * k)?.to_f64()?;
966            let q3_high = quickselect(data, 3 * k + 1)?.to_f64()?;
967            let q3 = f64::midpoint(q3_low, q3_high);
968
969            Some((q1, q2, q3))
970        }
971        2 => {
972            // Length is 4k + 2
973            // Q1 = x_k
974            // Q2 = (x_{2k} + x_{2k+1}) / 2
975            // Q3 = x_{3k+1}
976            let q1 = quickselect(data, k)?.to_f64()?;
977
978            let q2_low = quickselect(data, 2 * k)?.to_f64()?;
979            let q2_high = quickselect(data, 2 * k + 1)?.to_f64()?;
980            let q2 = f64::midpoint(q2_low, q2_high);
981
982            let q3 = quickselect(data, 3 * k + 1)?.to_f64()?;
983
984            Some((q1, q2, q3))
985        }
986        _ => {
987            // Length is 4k + 3
988            // Q1 = x_k
989            // Q2 = x_{2k+1}
990            // Q3 = x_{3k+2}
991            let q1 = quickselect(data, k)?.to_f64()?;
992            let q2 = quickselect(data, 2 * k + 1)?.to_f64()?;
993            let q3 = quickselect(data, 3 * k + 2)?.to_f64()?;
994
995            Some((q1, q2, q3))
996        }
997    }
998}
999
1000/// Zero-copy quartiles computation using index-based selection.
1001/// This avoids copying data by working with an array of indices.
1002fn quartiles_with_zero_copy_selection<T>(data: &[Partial<T>]) -> Option<(f64, f64, f64)>
1003where
1004    T: PartialOrd + ToPrimitive,
1005{
1006    let len = data.len();
1007
1008    // Early return for small arrays
1009    match len {
1010        0..=2 => return None,
1011        3 => {
1012            // For 3 elements, create indices and find sorted order
1013            let mut indices = vec![0, 1, 2];
1014            let min_val = quickselect_by_index(data, &mut indices, 0)?.to_f64()?;
1015            let med_val = quickselect_by_index(data, &mut indices, 1)?.to_f64()?;
1016            let max_val = quickselect_by_index(data, &mut indices, 2)?.to_f64()?;
1017            return Some((min_val, med_val, max_val));
1018        }
1019        _ => {}
1020    }
1021
1022    // Calculate k and remainder in one division
1023    let k = len / 4;
1024    let remainder = len % 4;
1025
1026    // Allocate indices array once and reuse it by resetting after each call
1027    let indices_template: Vec<usize> = (0..len).collect();
1028    #[allow(unused)]
1029    let mut indices: Vec<usize> = Vec::with_capacity(len);
1030
1031    // Use zero-copy selection algorithm to find the required order statistics
1032    // Note: Each quickselect_by_index call mutates the indices array, so we need
1033    // to reset it for each call to ensure correctness.
1034    match remainder {
1035        0 => {
1036            // Length is multiple of 4 (4k)
1037            indices.clone_from(&indices_template);
1038            let q1_low = quickselect_by_index(data, &mut indices, k - 1)?.to_f64()?;
1039            indices.clone_from(&indices_template);
1040            let q1_high = quickselect_by_index(data, &mut indices, k)?.to_f64()?;
1041            let q1 = f64::midpoint(q1_low, q1_high);
1042
1043            indices.clone_from(&indices_template);
1044            let q2_low = quickselect_by_index(data, &mut indices, 2 * k - 1)?.to_f64()?;
1045            indices.clone_from(&indices_template);
1046            let q2_high = quickselect_by_index(data, &mut indices, 2 * k)?.to_f64()?;
1047            let q2 = f64::midpoint(q2_low, q2_high);
1048
1049            indices.clone_from(&indices_template);
1050            let q3_low = quickselect_by_index(data, &mut indices, 3 * k - 1)?.to_f64()?;
1051            indices.clone_from(&indices_template);
1052            let q3_high = quickselect_by_index(data, &mut indices, 3 * k)?.to_f64()?;
1053            let q3 = f64::midpoint(q3_low, q3_high);
1054
1055            Some((q1, q2, q3))
1056        }
1057        1 => {
1058            // Length is 4k + 1
1059            indices.clone_from(&indices_template);
1060            let q1_low = quickselect_by_index(data, &mut indices, k - 1)?.to_f64()?;
1061            indices.clone_from(&indices_template);
1062            let q1_high = quickselect_by_index(data, &mut indices, k)?.to_f64()?;
1063            let q1 = f64::midpoint(q1_low, q1_high);
1064
1065            indices.clone_from(&indices_template);
1066            let q2 = quickselect_by_index(data, &mut indices, 2 * k)?.to_f64()?;
1067
1068            indices.clone_from(&indices_template);
1069            let q3_low = quickselect_by_index(data, &mut indices, 3 * k)?.to_f64()?;
1070            indices.clone_from(&indices_template);
1071            let q3_high = quickselect_by_index(data, &mut indices, 3 * k + 1)?.to_f64()?;
1072            let q3 = f64::midpoint(q3_low, q3_high);
1073
1074            Some((q1, q2, q3))
1075        }
1076        2 => {
1077            // Length is 4k + 2
1078            indices.clone_from(&indices_template);
1079            let q1 = quickselect_by_index(data, &mut indices, k)?.to_f64()?;
1080
1081            indices.clone_from(&indices_template);
1082            let q2_low = quickselect_by_index(data, &mut indices, 2 * k)?.to_f64()?;
1083            indices.clone_from(&indices_template);
1084            let q2_high = quickselect_by_index(data, &mut indices, 2 * k + 1)?.to_f64()?;
1085            let q2 = f64::midpoint(q2_low, q2_high);
1086
1087            indices.clone_from(&indices_template);
1088            let q3 = quickselect_by_index(data, &mut indices, 3 * k + 1)?.to_f64()?;
1089
1090            Some((q1, q2, q3))
1091        }
1092        _ => {
1093            // Length is 4k + 3
1094            indices.clone_from(&indices_template);
1095            let q1 = quickselect_by_index(data, &mut indices, k)?.to_f64()?;
1096            indices.clone_from(&indices_template);
1097            let q2 = quickselect_by_index(data, &mut indices, 2 * k + 1)?.to_f64()?;
1098            indices.clone_from(&indices_template);
1099            let q3 = quickselect_by_index(data, &mut indices, 3 * k + 2)?.to_f64()?;
1100
1101            Some((q1, q2, q3))
1102        }
1103    }
1104}
1105
1106fn mode_on_sorted<T, I>(it: I) -> Option<T>
1107where
1108    T: PartialOrd,
1109    I: Iterator<Item = T>,
1110{
1111    use std::cmp::Ordering;
1112
1113    // This approach to computing the mode works very nicely when the
1114    // number of samples is large and is close to its cardinality.
1115    // In other cases, a hashmap would be much better.
1116    // But really, how can we know this when given an arbitrary stream?
1117    // Might just switch to a hashmap to track frequencies. That would also
1118    // be generally useful for discovering the cardinality of a sample.
1119    let (mut mode, mut next) = (None, None);
1120    let (mut mode_count, mut next_count) = (0usize, 0usize);
1121    for x in it {
1122        if mode.as_ref() == Some(&x) {
1123            mode_count += 1;
1124        } else if next.as_ref() == Some(&x) {
1125            next_count += 1;
1126        } else {
1127            next = Some(x);
1128            next_count = 0;
1129        }
1130
1131        match next_count.cmp(&mode_count) {
1132            Ordering::Greater => {
1133                mode = next;
1134                mode_count = next_count;
1135                next = None;
1136                next_count = 0;
1137            }
1138            Ordering::Equal => {
1139                mode = None;
1140                mode_count = 0;
1141            }
1142            Ordering::Less => {}
1143        }
1144    }
1145    mode
1146}
1147
1148/// Computes both modes and antimodes from a sorted slice of values.
1149/// This version works with references to avoid unnecessary cloning.
1150///
1151/// # Arguments
1152///
1153/// * `data` - A sorted slice of values
1154///
1155/// # Notes
1156///
1157/// - Mode is the most frequently occurring value(s)
1158/// - Antimode is the least frequently occurring value(s)
1159/// - Only returns up to 10 antimodes to avoid returning the full set when all values are unique
1160/// - For empty slices, returns empty vectors and zero counts
1161/// - For single value slices, returns that value as the mode and empty antimode
1162/// - When all values occur exactly once, returns empty mode and up to 10 values as antimodes
1163///
1164/// # Returns
1165///
1166/// A tuple containing:
1167/// * Modes information: `(Vec<T>, usize, u32)` where:
1168///   - Vec<T>: Vector containing the mode values
1169///   - usize: Number of modes found
1170///   - u32: Frequency/count of the mode values
1171/// * Antimodes information: `(Vec<T>, usize, u32)` where:
1172///   - Vec<T>: Vector containing up to 10 antimode values
1173///   - usize: Total number of antimodes
1174///   - u32: Frequency/count of the antimode values
1175#[allow(clippy::type_complexity)]
1176#[inline]
1177fn modes_and_antimodes_on_sorted_slice<T>(
1178    data: &[Partial<T>],
1179) -> ((Vec<T>, usize, u32), (Vec<T>, usize, u32))
1180where
1181    T: PartialOrd + Clone,
1182{
1183    let size = data.len();
1184
1185    // Early return for empty slice
1186    if size == 0 {
1187        return ((Vec::new(), 0, 0), (Vec::new(), 0, 0));
1188    }
1189
1190    // Estimate capacity using integer square root of size
1191    // Integer square root using Newton's method (faster than floating point sqrt)
1192    let mut x = size;
1193    let mut y = x.div_ceil(2);
1194    while y < x {
1195        x = y;
1196        y = usize::midpoint(x, size / x);
1197    }
1198    let sqrt_size = x;
1199    let mut runs: Vec<(&T, u32)> = Vec::with_capacity(sqrt_size.clamp(16, 1_000));
1200
1201    let mut current_value = &data[0].0;
1202    let mut current_count = 1;
1203    let mut highest_count = 1;
1204    let mut lowest_count = u32::MAX;
1205
1206    // Count consecutive runs - optimized to reduce allocations
1207    for x in data.iter().skip(1) {
1208        if x.0 == *current_value {
1209            current_count += 1;
1210            highest_count = highest_count.max(current_count);
1211        } else {
1212            runs.push((current_value, current_count));
1213            lowest_count = lowest_count.min(current_count);
1214            current_value = &x.0;
1215            current_count = 1;
1216        }
1217    }
1218    runs.push((current_value, current_count));
1219    lowest_count = lowest_count.min(current_count);
1220
1221    // Early return if only one unique value
1222    if runs.len() == 1 {
1223        let (val, count) = runs.pop().unwrap();
1224        return ((vec![val.clone()], 1, count), (Vec::new(), 0, 0));
1225    }
1226
1227    // Special case: if all values appear exactly once
1228    if highest_count == 1 {
1229        let antimodes_count = runs.len().min(10);
1230        let total_count = runs.len();
1231        let mut antimodes = Vec::with_capacity(antimodes_count);
1232        for (val, _) in runs.into_iter().take(antimodes_count) {
1233            antimodes.push(val.clone());
1234        }
1235        // For modes: empty, count 0, occurrences 0 (not 1, 1)
1236        return ((Vec::new(), 0, 0), (antimodes, total_count, 1));
1237    }
1238
1239    // Estimate capacities based on the number of runs
1240    // For modes: typically 1-3 modes, rarely more than 10% of runs
1241    // For antimodes: we only collect up to 10, but need to count all
1242    let estimated_modes = (runs.len() / 10).clamp(1, 10);
1243    let estimated_antimodes = 10.min(runs.len());
1244
1245    // Collect indices first to avoid unnecessary cloning
1246    let mut modes_indices = Vec::with_capacity(estimated_modes);
1247    let mut antimodes_indices = Vec::with_capacity(estimated_antimodes);
1248    let mut mode_count = 0;
1249    let mut antimodes_count = 0;
1250    let mut antimodes_collected = 0_u32;
1251
1252    // Count and collect mode/antimode indices simultaneously
1253    for (idx, (_, count)) in runs.iter().enumerate() {
1254        if *count == highest_count {
1255            modes_indices.push(idx);
1256            mode_count += 1;
1257        }
1258        if *count == lowest_count {
1259            antimodes_count += 1;
1260            if antimodes_collected < 10 {
1261                antimodes_indices.push(idx);
1262                antimodes_collected += 1;
1263            }
1264        }
1265    }
1266
1267    // Extract values only for the indices we need, cloning only at the end
1268    let modes_result: Vec<T> = modes_indices
1269        .into_iter()
1270        .map(|idx| runs[idx].0.clone())
1271        .collect();
1272    let antimodes_result: Vec<T> = antimodes_indices
1273        .into_iter()
1274        .map(|idx| runs[idx].0.clone())
1275        .collect();
1276
1277    (
1278        (modes_result, mode_count, highest_count),
1279        (antimodes_result, antimodes_count, lowest_count),
1280    )
1281}
1282
1283/// A commutative data structure for lazily sorted sequences of data.
1284///
1285/// The sort does not occur until statistics need to be computed.
1286///
1287/// Note that this works on types that do not define a total ordering like
1288/// `f32` and `f64`. When an ordering is not defined, an arbitrary order
1289/// is returned.
1290#[allow(clippy::unsafe_derive_deserialize)]
1291#[derive(Clone, Serialize, Deserialize, Eq, PartialEq)]
1292pub struct Unsorted<T> {
1293    sorted: bool,
1294    data: Vec<Partial<T>>,
1295}
1296
1297impl<T: PartialOrd> Unsorted<T> {
1298    /// Create initial empty state.
1299    #[inline]
1300    #[must_use]
1301    pub fn new() -> Unsorted<T> {
1302        Default::default()
1303    }
1304
1305    /// Add a new element to the set.
1306    #[allow(clippy::inline_always)]
1307    #[inline(always)]
1308    pub fn add(&mut self, v: T) {
1309        self.sorted = false;
1310        self.data.push(Partial(v));
1311    }
1312
1313    /// Return the number of data points.
1314    #[inline]
1315    #[must_use]
1316    pub const fn len(&self) -> usize {
1317        self.data.len()
1318    }
1319
1320    #[inline]
1321    #[must_use]
1322    pub const fn is_empty(&self) -> bool {
1323        self.data.is_empty()
1324    }
1325
1326    #[inline]
1327    fn sort(&mut self) {
1328        if !self.sorted {
1329            // Use sequential sort for small datasets (< 10k elements) to avoid parallel overhead
1330            if self.data.len() < PARALLEL_THRESHOLD {
1331                self.data.sort_unstable();
1332            } else {
1333                self.data.par_sort_unstable();
1334            }
1335            self.sorted = true;
1336        }
1337    }
1338
1339    #[inline]
1340    const fn already_sorted(&mut self) {
1341        self.sorted = true;
1342    }
1343
1344    /// Add multiple elements efficiently
1345    #[inline]
1346    pub fn add_bulk(&mut self, values: Vec<T>) {
1347        self.sorted = false;
1348        self.data.reserve(values.len());
1349        self.data.extend(values.into_iter().map(Partial));
1350    }
1351
1352    /// Shrink capacity to fit current data
1353    #[inline]
1354    pub fn shrink_to_fit(&mut self) {
1355        self.data.shrink_to_fit();
1356    }
1357
1358    /// Create with specific capacity
1359    #[inline]
1360    #[must_use]
1361    pub fn with_capacity(capacity: usize) -> Self {
1362        Unsorted {
1363            sorted: true,
1364            data: Vec::with_capacity(capacity),
1365        }
1366    }
1367
1368    /// Add a value assuming it's greater than all existing values
1369    #[inline]
1370    pub fn push_ascending(&mut self, value: T) {
1371        if let Some(last) = self.data.last() {
1372            debug_assert!(last.0 <= value, "Value must be >= than last element");
1373        }
1374        self.data.push(Partial(value));
1375        // Data remains sorted
1376    }
1377}
1378
1379impl<T: PartialOrd + PartialEq + Clone> Unsorted<T> {
1380    #[inline]
1381    /// Returns the cardinality of the data.
1382    /// Set `sorted` to `true` if the data is already sorted.
1383    /// Set `parallel_threshold` to `0` to force sequential processing.
1384    /// Set `parallel_threshold` to `1` to use the default parallel threshold (`10_000`).
1385    /// Set `parallel_threshold` to `2` to force parallel processing.
1386    /// Set `parallel_threshold` to any other value to use a custom parallel threshold
1387    /// greater than the default threshold of `10_000`.
1388    pub fn cardinality(&mut self, sorted: bool, parallel_threshold: usize) -> u64 {
1389        const CHUNK_SIZE: usize = 2048; // Process data in chunks of 2048 elements
1390        const DEFAULT_PARALLEL_THRESHOLD: usize = 10_240; // multiple of 2048
1391
1392        let len = self.data.len();
1393        match len {
1394            0 => return 0,
1395            1 => return 1,
1396            _ => {}
1397        }
1398
1399        if sorted {
1400            self.already_sorted();
1401        } else {
1402            self.sort();
1403        }
1404
1405        let use_parallel = parallel_threshold != 0
1406            && (parallel_threshold == 1
1407                || len > parallel_threshold.max(DEFAULT_PARALLEL_THRESHOLD));
1408
1409        if use_parallel {
1410            // Parallel processing using chunks
1411            // Process chunks in parallel, returning (count, first_elem, last_elem) for each
1412            type ChunkInfo<'a, T> = Vec<(u64, Option<&'a Partial<T>>, Option<&'a Partial<T>>)>;
1413            let chunk_info: ChunkInfo<'_, T> = self
1414                .data
1415                .par_chunks(CHUNK_SIZE)
1416                .map(|chunk| {
1417                    // Count unique elements within this chunk
1418                    let mut count = u64::from(!chunk.is_empty());
1419                    for window in chunk.windows(2) {
1420                        // safety: windows(2) guarantees window has length 2
1421                        if unsafe { window.get_unchecked(0) != window.get_unchecked(1) } {
1422                            count += 1;
1423                        }
1424                    }
1425                    (count, chunk.first(), chunk.last())
1426                })
1427                .collect();
1428
1429            // Combine results, checking boundaries between chunks
1430            let mut total = 0;
1431            for (i, &(count, first_opt, _last_opt)) in chunk_info.iter().enumerate() {
1432                total += count;
1433
1434                // Check boundary with previous chunk
1435                if i > 0
1436                    && let (Some(prev_last), Some(curr_first)) = (chunk_info[i - 1].2, first_opt)
1437                    && prev_last == curr_first
1438                {
1439                    total -= 1; // Deduct 1 if boundary values are equal
1440                }
1441            }
1442
1443            total
1444        } else {
1445            // Sequential processing
1446
1447            // the statement below is equivalent to:
1448            // let mut count = if self.data.is_empty() { 0 } else { 1 };
1449            let mut count = u64::from(!self.data.is_empty());
1450
1451            for window in self.data.windows(2) {
1452                // safety: windows(2) guarantees window has length 2
1453                if unsafe { window.get_unchecked(0) != window.get_unchecked(1) } {
1454                    count += 1;
1455                }
1456            }
1457            count
1458        }
1459    }
1460}
1461
1462impl<T: PartialOrd + Clone> Unsorted<T> {
1463    /// Returns the mode of the data.
1464    #[inline]
1465    pub fn mode(&mut self) -> Option<T> {
1466        if self.data.is_empty() {
1467            return None;
1468        }
1469        self.sort();
1470        mode_on_sorted(self.data.iter().map(|p| &p.0)).cloned()
1471    }
1472
1473    /// Returns the modes of the data.
1474    /// Note that there is also a `frequency::mode()` function that return one mode
1475    /// with the highest frequency. If there is a tie, it returns None.
1476    #[inline]
1477    fn modes(&mut self) -> (Vec<T>, usize, u32) {
1478        if self.data.is_empty() {
1479            return (Vec::new(), 0, 0);
1480        }
1481        self.sort();
1482        modes_and_antimodes_on_sorted_slice(&self.data).0
1483    }
1484
1485    /// Returns the antimodes of the data.
1486    /// `antimodes_result` only returns the first 10 antimodes
1487    #[inline]
1488    fn antimodes(&mut self) -> (Vec<T>, usize, u32) {
1489        if self.data.is_empty() {
1490            return (Vec::new(), 0, 0);
1491        }
1492        self.sort();
1493        modes_and_antimodes_on_sorted_slice(&self.data).1
1494    }
1495
1496    /// Returns the modes and antimodes of the data.
1497    /// `antimodes_result` only returns the first 10 antimodes
1498    #[allow(clippy::type_complexity)]
1499    #[inline]
1500    pub fn modes_antimodes(&mut self) -> ((Vec<T>, usize, u32), (Vec<T>, usize, u32)) {
1501        if self.data.is_empty() {
1502            return ((Vec::new(), 0, 0), (Vec::new(), 0, 0));
1503        }
1504        self.sort();
1505        modes_and_antimodes_on_sorted_slice(&self.data)
1506    }
1507}
1508
1509impl<T: PartialOrd + ToPrimitive> Unsorted<T> {
1510    /// Returns the median of the data.
1511    #[inline]
1512    pub fn median(&mut self) -> Option<f64> {
1513        if self.data.is_empty() {
1514            return None;
1515        }
1516        self.sort();
1517        median_on_sorted(&self.data)
1518    }
1519}
1520
1521impl<T: PartialOrd + ToPrimitive> Unsorted<T> {
1522    /// Returns the Median Absolute Deviation (MAD) of the data.
1523    #[inline]
1524    pub fn mad(&mut self, existing_median: Option<f64>) -> Option<f64> {
1525        if self.data.is_empty() {
1526            return None;
1527        }
1528        if existing_median.is_none() {
1529            self.sort();
1530        }
1531        mad_on_sorted(&self.data, existing_median)
1532    }
1533}
1534
1535impl<T: PartialOrd + ToPrimitive> Unsorted<T> {
1536    /// Returns the quartiles of the data using the traditional sorting approach.
1537    ///
1538    /// This method sorts the data first and then computes quartiles.
1539    /// Time complexity: O(n log n)
1540    #[inline]
1541    pub fn quartiles(&mut self) -> Option<(f64, f64, f64)> {
1542        if self.data.is_empty() {
1543            return None;
1544        }
1545        self.sort();
1546        quartiles_on_sorted(&self.data)
1547    }
1548}
1549
1550impl<T: PartialOrd + ToPrimitive + Sync> Unsorted<T> {
1551    /// Returns the Gini Coefficient of the data.
1552    ///
1553    /// The Gini Coefficient measures inequality in a distribution, ranging from 0 (perfect equality)
1554    /// to 1 (perfect inequality). This method sorts the data first and then computes the Gini coefficient.
1555    /// Time complexity: O(n log n)
1556    #[inline]
1557    pub fn gini(&mut self, precalc_sum: Option<f64>) -> Option<f64> {
1558        if self.data.is_empty() {
1559            return None;
1560        }
1561        self.sort();
1562        gini_on_sorted(&self.data, precalc_sum)
1563    }
1564
1565    /// Returns the kurtosis (excess kurtosis) of the data.
1566    ///
1567    /// Kurtosis measures the "tailedness" of a distribution. Excess kurtosis is kurtosis - 3,
1568    /// where 0 indicates a normal distribution, positive values indicate heavy tails, and
1569    /// negative values indicate light tails. This method sorts the data first and then computes kurtosis.
1570    /// Time complexity: O(n log n)
1571    #[inline]
1572    pub fn kurtosis(
1573        &mut self,
1574        precalc_mean: Option<f64>,
1575        precalc_variance: Option<f64>,
1576    ) -> Option<f64> {
1577        if self.data.is_empty() {
1578            return None;
1579        }
1580        self.sort();
1581        kurtosis_on_sorted(&self.data, precalc_mean, precalc_variance)
1582    }
1583
1584    /// Returns the percentile rank of a value in the data.
1585    ///
1586    /// Returns the percentile rank (0-100) of the given value. If the value is less than all
1587    /// values, returns 0.0. If greater than all, returns 100.0.
1588    /// This method sorts the data first and then computes the percentile rank.
1589    /// Time complexity: O(n log n)
1590    #[inline]
1591    pub fn percentile_rank<V>(&mut self, value: V) -> Option<f64>
1592    where
1593        V: PartialOrd + ToPrimitive,
1594    {
1595        if self.data.is_empty() {
1596            return None;
1597        }
1598        self.sort();
1599        percentile_rank_on_sorted(&self.data, &value)
1600    }
1601
1602    /// Returns the Atkinson Index of the data.
1603    ///
1604    /// The Atkinson Index measures inequality with an inequality aversion parameter ε.
1605    /// It ranges from 0 (perfect equality) to 1 (perfect inequality).
1606    /// Higher ε values give more weight to inequality at the lower end of the distribution.
1607    /// This method sorts the data first and then computes the Atkinson index.
1608    /// Time complexity: O(n log n)
1609    ///
1610    /// # Arguments
1611    /// * `epsilon` - Inequality aversion parameter (must be >= 0). Common values:
1612    ///   - 0.0: No inequality aversion (returns 0)
1613    ///   - 0.5: Moderate aversion
1614    ///   - 1.0: Uses geometric mean (special case)
1615    ///   - 2.0: High aversion
1616    /// * `precalc_mean` - Optional pre-calculated mean
1617    /// * `precalc_geometric_sum` - Optional pre-calculated geometric sum (sum of ln(val)), only used when epsilon = 1
1618    #[inline]
1619    pub fn atkinson(
1620        &mut self,
1621        epsilon: f64,
1622        precalc_mean: Option<f64>,
1623        precalc_geometric_sum: Option<f64>,
1624    ) -> Option<f64> {
1625        if self.data.is_empty() {
1626            return None;
1627        }
1628        self.sort();
1629        atkinson_on_sorted(&self.data, epsilon, precalc_mean, precalc_geometric_sum)
1630    }
1631}
1632
1633impl<T: PartialOrd + ToPrimitive + Clone> Unsorted<T> {
1634    /// Returns the quartiles of the data using selection algorithm.
1635    ///
1636    /// This implementation uses a selection algorithm (quickselect) to find quartiles
1637    /// in O(n) average time complexity instead of O(n log n) sorting.
1638    /// Requires T to implement Clone to create a working copy of the data.
1639    ///
1640    /// **Performance Note**: While theoretically O(n) vs O(n log n), this implementation
1641    /// is often slower than the sorting-based approach for small to medium datasets due to:
1642    /// - Need to find multiple order statistics (3 separate quickselect calls)
1643    /// - Overhead of copying data to avoid mutation
1644    /// - Rayon's highly optimized parallel sorting
1645    #[inline]
1646    pub fn quartiles_with_selection(&mut self) -> Option<(f64, f64, f64)> {
1647        if self.data.is_empty() {
1648            return None;
1649        }
1650        // If data is already sorted, use zero-copy approach to avoid cloning
1651        if self.sorted {
1652            return quartiles_with_zero_copy_selection(&self.data);
1653        }
1654        // Create a copy using collect to avoid mutating the original for selection algorithm
1655        let mut data_copy: Vec<Partial<T>> =
1656            self.data.iter().map(|x| Partial(x.0.clone())).collect();
1657        quartiles_with_selection(&mut data_copy)
1658    }
1659}
1660
1661impl<T: PartialOrd + ToPrimitive> Unsorted<T> {
1662    /// Returns the quartiles using zero-copy selection algorithm.
1663    ///
1664    /// This implementation avoids copying data by working with indices instead,
1665    /// providing better performance than the clone-based selection approach.
1666    /// The algorithm is O(n) average time and only allocates a vector of indices (usize).
1667    #[inline]
1668    #[must_use]
1669    pub fn quartiles_zero_copy(&self) -> Option<(f64, f64, f64)> {
1670        if self.data.is_empty() {
1671            return None;
1672        }
1673        quartiles_with_zero_copy_selection(&self.data)
1674    }
1675}
1676
1677impl<T: PartialOrd> Commute for Unsorted<T> {
1678    #[inline]
1679    fn merge(&mut self, mut v: Unsorted<T>) {
1680        if v.is_empty() {
1681            return;
1682        }
1683
1684        self.sorted = false;
1685        // we use std::mem::take to avoid unnecessary allocations
1686        self.data.extend(std::mem::take(&mut v.data));
1687    }
1688}
1689
1690impl<T: PartialOrd> Default for Unsorted<T> {
1691    #[inline]
1692    fn default() -> Unsorted<T> {
1693        Unsorted {
1694            data: Vec::with_capacity(256),
1695            sorted: true, // empty is sorted
1696        }
1697    }
1698}
1699
1700impl<T: PartialOrd> FromIterator<T> for Unsorted<T> {
1701    #[inline]
1702    fn from_iter<I: IntoIterator<Item = T>>(it: I) -> Unsorted<T> {
1703        let mut v = Unsorted::new();
1704        v.extend(it);
1705        v
1706    }
1707}
1708
1709impl<T: PartialOrd> Extend<T> for Unsorted<T> {
1710    #[inline]
1711    fn extend<I: IntoIterator<Item = T>>(&mut self, it: I) {
1712        self.sorted = false;
1713        self.data.extend(it.into_iter().map(Partial));
1714    }
1715}
1716
1717fn custom_percentiles_on_sorted<T>(data: &[Partial<T>], percentiles: &[u8]) -> Option<Vec<T>>
1718where
1719    T: PartialOrd + Clone,
1720{
1721    let len = data.len();
1722
1723    // Early return for empty array or invalid percentiles
1724    if len == 0 || percentiles.iter().any(|&p| p > 100) {
1725        return None;
1726    }
1727
1728    // Optimize: Check if percentiles are already sorted and unique
1729    let unique_percentiles: Vec<u8> = if percentiles.len() <= 1 {
1730        // Single or empty percentile - no need to sort/dedup
1731        percentiles.to_vec()
1732    } else {
1733        // Check if already sorted and unique (common case)
1734        let is_sorted_unique = percentiles.windows(2).all(|w| w[0] < w[1]);
1735
1736        if is_sorted_unique {
1737            // Already sorted and unique, use directly without cloning
1738            percentiles.to_vec()
1739        } else {
1740            // Need to sort and dedup - use fixed-size bool array (domain is 0..=100)
1741            let mut seen = [false; 101];
1742            let mut sorted_unique = Vec::with_capacity(percentiles.len().min(101));
1743            for &p in percentiles {
1744                if !seen[p as usize] {
1745                    seen[p as usize] = true;
1746                    sorted_unique.push(p);
1747                }
1748            }
1749            sorted_unique.sort_unstable();
1750            sorted_unique
1751        }
1752    };
1753
1754    let mut results = Vec::with_capacity(unique_percentiles.len());
1755
1756    // SAFETY: All index calculations below are guaranteed to be in bounds
1757    // because we've verified len > 0 and the rank calculation ensures
1758    // the index is within bounds
1759    unsafe {
1760        for &p in &unique_percentiles {
1761            // Calculate the ordinal rank using nearest-rank method
1762            // see https://en.wikipedia.org/wiki/Percentile#The_nearest-rank_method
1763            // n = ⌈(P/100) × N⌉
1764            #[allow(clippy::cast_sign_loss)]
1765            let rank = ((f64::from(p) / 100.0) * len as f64).ceil() as usize;
1766
1767            // Convert to 0-based index
1768            let idx = rank.saturating_sub(1);
1769
1770            // Get the value at that rank and extract the inner value
1771            results.push(data.get_unchecked(idx).0.clone());
1772        }
1773    }
1774
1775    Some(results)
1776}
1777
1778impl<T: PartialOrd + Clone> Unsorted<T> {
1779    /// Returns the requested percentiles of the data.
1780    ///
1781    /// Uses the nearest-rank method to compute percentiles.
1782    /// Each returned value is an actual value from the dataset.
1783    ///
1784    /// # Arguments
1785    /// * `percentiles` - A slice of u8 values representing percentiles to compute (0-100)
1786    ///
1787    /// # Returns
1788    /// * `None` if the data is empty or if any percentile is > 100
1789    /// * `Some(Vec<T>)` containing percentile values in the same order as requested
1790    ///
1791    /// # Example
1792    /// ```
1793    /// use stats::Unsorted;
1794    /// let mut data = Unsorted::new();
1795    /// data.extend(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
1796    /// let percentiles = vec![25, 50, 75];
1797    /// let results = data.custom_percentiles(&percentiles).unwrap();
1798    /// assert_eq!(results, vec![3, 5, 8]);
1799    /// ```
1800    #[inline]
1801    pub fn custom_percentiles(&mut self, percentiles: &[u8]) -> Option<Vec<T>> {
1802        if self.data.is_empty() {
1803            return None;
1804        }
1805        self.sort();
1806        custom_percentiles_on_sorted(&self.data, percentiles)
1807    }
1808}
1809
1810#[cfg(test)]
1811mod test {
1812    use super::*;
1813
1814    #[test]
1815    fn test_cardinality_empty() {
1816        let mut unsorted: Unsorted<i32> = Unsorted::new();
1817        assert_eq!(unsorted.cardinality(false, 1), 0);
1818    }
1819
1820    #[test]
1821    fn test_cardinality_single_element() {
1822        let mut unsorted = Unsorted::new();
1823        unsorted.add(5);
1824        assert_eq!(unsorted.cardinality(false, 1), 1);
1825    }
1826
1827    #[test]
1828    fn test_cardinality_unique_elements() {
1829        let mut unsorted = Unsorted::new();
1830        unsorted.extend(vec![1, 2, 3, 4, 5]);
1831        assert_eq!(unsorted.cardinality(false, 1), 5);
1832    }
1833
1834    #[test]
1835    fn test_cardinality_duplicate_elements() {
1836        let mut unsorted = Unsorted::new();
1837        unsorted.extend(vec![1, 2, 2, 3, 3, 3, 4, 4, 4, 4]);
1838        assert_eq!(unsorted.cardinality(false, 1), 4);
1839    }
1840
1841    #[test]
1842    fn test_cardinality_all_same() {
1843        let mut unsorted = Unsorted::new();
1844        unsorted.extend(vec![1; 100]);
1845        assert_eq!(unsorted.cardinality(false, 1), 1);
1846    }
1847
1848    #[test]
1849    fn test_cardinality_large_range() {
1850        let mut unsorted = Unsorted::new();
1851        unsorted.extend(0..1_000_000);
1852        assert_eq!(unsorted.cardinality(false, 1), 1_000_000);
1853    }
1854
1855    #[test]
1856    fn test_cardinality_large_range_sequential() {
1857        let mut unsorted = Unsorted::new();
1858        unsorted.extend(0..1_000_000);
1859        assert_eq!(unsorted.cardinality(false, 2_000_000), 1_000_000);
1860    }
1861
1862    #[test]
1863    fn test_cardinality_presorted() {
1864        let mut unsorted = Unsorted::new();
1865        unsorted.extend(vec![1, 2, 3, 4, 5]);
1866        unsorted.sort();
1867        assert_eq!(unsorted.cardinality(true, 1), 5);
1868    }
1869
1870    #[test]
1871    fn test_cardinality_float() {
1872        let mut unsorted = Unsorted::new();
1873        unsorted.extend(vec![1.0, 1.0, 2.0, 3.0, 3.0, 4.0]);
1874        assert_eq!(unsorted.cardinality(false, 1), 4);
1875    }
1876
1877    #[test]
1878    fn test_cardinality_string() {
1879        let mut unsorted = Unsorted::new();
1880        unsorted.extend(vec!["a", "b", "b", "c", "c", "c"]);
1881        assert_eq!(unsorted.cardinality(false, 1), 3);
1882    }
1883
1884    #[test]
1885    fn test_quartiles_selection_vs_sorted() {
1886        // Test that selection-based quartiles gives same results as sorting-based
1887        let test_cases = vec![
1888            vec![3, 5, 7, 9],
1889            vec![3, 5, 7],
1890            vec![1, 2, 7, 11],
1891            vec![3, 5, 7, 9, 12],
1892            vec![2, 2, 3, 8, 10],
1893            vec![3, 5, 7, 9, 12, 20],
1894            vec![0, 2, 4, 8, 10, 11],
1895            vec![3, 5, 7, 9, 12, 20, 21],
1896            vec![1, 5, 6, 6, 7, 10, 19],
1897        ];
1898
1899        for test_case in test_cases {
1900            let mut unsorted1 = Unsorted::new();
1901            let mut unsorted2 = Unsorted::new();
1902            let mut unsorted3 = Unsorted::new();
1903            unsorted1.extend(test_case.clone());
1904            unsorted2.extend(test_case.clone());
1905            unsorted3.extend(test_case.clone());
1906
1907            let result_sorted = unsorted1.quartiles();
1908            let result_selection = unsorted2.quartiles_with_selection();
1909            let result_zero_copy = unsorted3.quartiles_zero_copy();
1910
1911            assert_eq!(
1912                result_sorted, result_selection,
1913                "Selection mismatch for test case: {:?}",
1914                test_case
1915            );
1916            assert_eq!(
1917                result_sorted, result_zero_copy,
1918                "Zero-copy mismatch for test case: {:?}",
1919                test_case
1920            );
1921        }
1922    }
1923
1924    #[test]
1925    fn test_quartiles_with_selection_small() {
1926        // Test edge cases for selection-based quartiles
1927        let mut unsorted: Unsorted<i32> = Unsorted::new();
1928        assert_eq!(unsorted.quartiles_with_selection(), None);
1929
1930        let mut unsorted = Unsorted::new();
1931        unsorted.extend(vec![1, 2]);
1932        assert_eq!(unsorted.quartiles_with_selection(), None);
1933
1934        let mut unsorted = Unsorted::new();
1935        unsorted.extend(vec![1, 2, 3]);
1936        assert_eq!(unsorted.quartiles_with_selection(), Some((1.0, 2.0, 3.0)));
1937    }
1938
1939    #[test]
1940    fn test_quickselect() {
1941        let data = vec![
1942            Partial(3),
1943            Partial(1),
1944            Partial(4),
1945            Partial(1),
1946            Partial(5),
1947            Partial(9),
1948            Partial(2),
1949            Partial(6),
1950        ];
1951
1952        // Test finding different positions
1953        assert_eq!(quickselect(&mut data.clone(), 0), Some(&1));
1954        assert_eq!(quickselect(&mut data.clone(), 3), Some(&3));
1955        assert_eq!(quickselect(&mut data.clone(), 7), Some(&9));
1956
1957        // Test edge cases
1958        let mut empty: Vec<Partial<i32>> = vec![];
1959        assert_eq!(quickselect(&mut empty, 0), None);
1960
1961        let mut data = vec![Partial(3), Partial(1), Partial(4), Partial(1), Partial(5)];
1962        assert_eq!(quickselect(&mut data, 10), None); // k >= len
1963    }
1964
1965    #[test]
1966    fn median_stream() {
1967        assert_eq!(median(vec![3usize, 5, 7, 9].into_iter()), Some(6.0));
1968        assert_eq!(median(vec![3usize, 5, 7].into_iter()), Some(5.0));
1969    }
1970
1971    #[test]
1972    fn mad_stream() {
1973        assert_eq!(mad(vec![3usize, 5, 7, 9].into_iter(), None), Some(2.0));
1974        assert_eq!(
1975            mad(
1976                vec![
1977                    86usize, 60, 95, 39, 49, 12, 56, 82, 92, 24, 33, 28, 46, 34, 100, 39, 100, 38,
1978                    50, 61, 39, 88, 5, 13, 64
1979                ]
1980                .into_iter(),
1981                None
1982            ),
1983            Some(16.0)
1984        );
1985    }
1986
1987    #[test]
1988    fn mad_stream_precalc_median() {
1989        let data = vec![3usize, 5, 7, 9].into_iter();
1990        let median1 = median(data.clone());
1991        assert_eq!(mad(data, median1), Some(2.0));
1992
1993        let data2 = vec![
1994            86usize, 60, 95, 39, 49, 12, 56, 82, 92, 24, 33, 28, 46, 34, 100, 39, 100, 38, 50, 61,
1995            39, 88, 5, 13, 64,
1996        ]
1997        .into_iter();
1998        let median2 = median(data2.clone());
1999        assert_eq!(mad(data2, median2), Some(16.0));
2000    }
2001
2002    #[test]
2003    fn mode_stream() {
2004        assert_eq!(mode(vec![3usize, 5, 7, 9].into_iter()), None);
2005        assert_eq!(mode(vec![3usize, 3, 3, 3].into_iter()), Some(3));
2006        assert_eq!(mode(vec![3usize, 3, 3, 4].into_iter()), Some(3));
2007        assert_eq!(mode(vec![4usize, 3, 3, 3].into_iter()), Some(3));
2008        assert_eq!(mode(vec![1usize, 1, 2, 3, 3].into_iter()), None);
2009    }
2010
2011    #[test]
2012    fn median_floats() {
2013        assert_eq!(median(vec![3.0f64, 5.0, 7.0, 9.0].into_iter()), Some(6.0));
2014        assert_eq!(median(vec![3.0f64, 5.0, 7.0].into_iter()), Some(5.0));
2015    }
2016
2017    #[test]
2018    fn mode_floats() {
2019        assert_eq!(mode(vec![3.0f64, 5.0, 7.0, 9.0].into_iter()), None);
2020        assert_eq!(mode(vec![3.0f64, 3.0, 3.0, 3.0].into_iter()), Some(3.0));
2021        assert_eq!(mode(vec![3.0f64, 3.0, 3.0, 4.0].into_iter()), Some(3.0));
2022        assert_eq!(mode(vec![4.0f64, 3.0, 3.0, 3.0].into_iter()), Some(3.0));
2023        assert_eq!(mode(vec![1.0f64, 1.0, 2.0, 3.0, 3.0].into_iter()), None);
2024    }
2025
2026    #[test]
2027    fn modes_stream() {
2028        assert_eq!(modes(vec![3usize, 5, 7, 9].into_iter()), (vec![], 0, 0));
2029        assert_eq!(modes(vec![3usize, 3, 3, 3].into_iter()), (vec![3], 1, 4));
2030        assert_eq!(modes(vec![3usize, 3, 4, 4].into_iter()), (vec![3, 4], 2, 2));
2031        assert_eq!(modes(vec![4usize, 3, 3, 3].into_iter()), (vec![3], 1, 3));
2032        assert_eq!(modes(vec![1usize, 1, 2, 2].into_iter()), (vec![1, 2], 2, 2));
2033        let vec: Vec<u32> = vec![];
2034        assert_eq!(modes(vec.into_iter()), (vec![], 0, 0));
2035    }
2036
2037    #[test]
2038    fn modes_floats() {
2039        assert_eq!(
2040            modes(vec![3_f64, 5.0, 7.0, 9.0].into_iter()),
2041            (vec![], 0, 0)
2042        );
2043        assert_eq!(
2044            modes(vec![3_f64, 3.0, 3.0, 3.0].into_iter()),
2045            (vec![3.0], 1, 4)
2046        );
2047        assert_eq!(
2048            modes(vec![3_f64, 3.0, 4.0, 4.0].into_iter()),
2049            (vec![3.0, 4.0], 2, 2)
2050        );
2051        assert_eq!(
2052            modes(vec![1_f64, 1.0, 2.0, 3.0, 3.0].into_iter()),
2053            (vec![1.0, 3.0], 2, 2)
2054        );
2055    }
2056
2057    #[test]
2058    fn antimodes_stream() {
2059        assert_eq!(
2060            antimodes(vec![3usize, 5, 7, 9].into_iter()),
2061            (vec![3, 5, 7, 9], 4, 1)
2062        );
2063        assert_eq!(
2064            antimodes(vec![1usize, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13].into_iter()),
2065            (vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 13, 1)
2066        );
2067        assert_eq!(
2068            antimodes(vec![1usize, 3, 3, 3].into_iter()),
2069            (vec![1], 1, 1)
2070        );
2071        assert_eq!(
2072            antimodes(vec![3usize, 3, 4, 4].into_iter()),
2073            (vec![3, 4], 2, 2)
2074        );
2075        assert_eq!(
2076            antimodes(
2077                vec![
2078                    3usize, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
2079                    14, 14, 15, 15
2080                ]
2081                .into_iter()
2082            ),
2083            // we only show the first 10 of the 13 antimodes
2084            (vec![3, 4, 5, 6, 7, 8, 9, 10, 11, 12], 13, 2)
2085        );
2086        assert_eq!(
2087            antimodes(
2088                vec![
2089                    3usize, 3, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 4, 4, 5, 5, 6, 6, 7, 7, 13, 13,
2090                    14, 14, 15, 15
2091                ]
2092                .into_iter()
2093            ),
2094            (vec![3, 4, 5, 6, 7, 8, 9, 10, 11, 12], 13, 2)
2095        );
2096        assert_eq!(
2097            antimodes(vec![3usize, 3, 3, 4].into_iter()),
2098            (vec![4], 1, 1)
2099        );
2100        assert_eq!(
2101            antimodes(vec![4usize, 3, 3, 3].into_iter()),
2102            (vec![4], 1, 1)
2103        );
2104        assert_eq!(
2105            antimodes(vec![1usize, 1, 2, 2].into_iter()),
2106            (vec![1, 2], 2, 2)
2107        );
2108        let vec: Vec<u32> = vec![];
2109        assert_eq!(antimodes(vec.into_iter()), (vec![], 0, 0));
2110    }
2111
2112    #[test]
2113    fn antimodes_floats() {
2114        assert_eq!(
2115            antimodes(vec![3_f64, 5.0, 7.0, 9.0].into_iter()),
2116            (vec![3.0, 5.0, 7.0, 9.0], 4, 1)
2117        );
2118        assert_eq!(
2119            antimodes(vec![3_f64, 3.0, 3.0, 3.0].into_iter()),
2120            (vec![], 0, 0)
2121        );
2122        assert_eq!(
2123            antimodes(vec![3_f64, 3.0, 4.0, 4.0].into_iter()),
2124            (vec![3.0, 4.0], 2, 2)
2125        );
2126        assert_eq!(
2127            antimodes(vec![1_f64, 1.0, 2.0, 3.0, 3.0].into_iter()),
2128            (vec![2.0], 1, 1)
2129        );
2130    }
2131
2132    #[test]
2133    fn test_custom_percentiles() {
2134        // Test with integers
2135        let mut unsorted: Unsorted<i32> = Unsorted::new();
2136        unsorted.extend(1..=11); // [1,2,3,4,5,6,7,8,9,10,11]
2137
2138        let result = unsorted.custom_percentiles(&[25, 50, 75]).unwrap();
2139        assert_eq!(result, vec![3, 6, 9]);
2140
2141        // Test with strings
2142        let mut str_data = Unsorted::new();
2143        str_data.extend(vec!["a", "b", "c", "d", "e"]);
2144        let result = str_data.custom_percentiles(&[20, 40, 60, 80]).unwrap();
2145        assert_eq!(result, vec!["a", "b", "c", "d"]);
2146
2147        // Test with chars
2148        let mut char_data = Unsorted::new();
2149        char_data.extend('a'..='e');
2150        let result = char_data.custom_percentiles(&[25, 50, 75]).unwrap();
2151        assert_eq!(result, vec!['b', 'c', 'd']);
2152
2153        // Test with floats
2154        let mut float_data = Unsorted::new();
2155        float_data.extend(vec![1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9]);
2156        let result = float_data
2157            .custom_percentiles(&[10, 30, 50, 70, 90])
2158            .unwrap();
2159        assert_eq!(result, vec![1.1, 3.3, 5.5, 7.7, 9.9]);
2160
2161        // Test with empty percentiles array
2162        let result = float_data.custom_percentiles(&[]).unwrap();
2163        assert_eq!(result, Vec::<f64>::new());
2164
2165        // Test with duplicate percentiles
2166        let result = float_data.custom_percentiles(&[50, 50, 50]).unwrap();
2167        assert_eq!(result, vec![5.5]);
2168
2169        // Test with extreme percentiles
2170        let result = float_data.custom_percentiles(&[0, 100]).unwrap();
2171        assert_eq!(result, vec![1.1, 9.9]);
2172
2173        // Test with unsorted percentiles
2174        let result = float_data.custom_percentiles(&[75, 25, 50]).unwrap();
2175        assert_eq!(result, vec![3.3, 5.5, 7.7]); // results always sorted
2176
2177        // Test with single element
2178        let mut single = Unsorted::new();
2179        single.add(42);
2180        let result = single.custom_percentiles(&[0, 50, 100]).unwrap();
2181        assert_eq!(result, vec![42, 42, 42]);
2182    }
2183
2184    #[test]
2185    fn quartiles_stream() {
2186        assert_eq!(
2187            quartiles(vec![3usize, 5, 7].into_iter()),
2188            Some((3., 5., 7.))
2189        );
2190        assert_eq!(
2191            quartiles(vec![3usize, 5, 7, 9].into_iter()),
2192            Some((4., 6., 8.))
2193        );
2194        assert_eq!(
2195            quartiles(vec![1usize, 2, 7, 11].into_iter()),
2196            Some((1.5, 4.5, 9.))
2197        );
2198        assert_eq!(
2199            quartiles(vec![3usize, 5, 7, 9, 12].into_iter()),
2200            Some((4., 7., 10.5))
2201        );
2202        assert_eq!(
2203            quartiles(vec![2usize, 2, 3, 8, 10].into_iter()),
2204            Some((2., 3., 9.))
2205        );
2206        assert_eq!(
2207            quartiles(vec![3usize, 5, 7, 9, 12, 20].into_iter()),
2208            Some((5., 8., 12.))
2209        );
2210        assert_eq!(
2211            quartiles(vec![0usize, 2, 4, 8, 10, 11].into_iter()),
2212            Some((2., 6., 10.))
2213        );
2214        assert_eq!(
2215            quartiles(vec![3usize, 5, 7, 9, 12, 20, 21].into_iter()),
2216            Some((5., 9., 20.))
2217        );
2218        assert_eq!(
2219            quartiles(vec![1usize, 5, 6, 6, 7, 10, 19].into_iter()),
2220            Some((5., 6., 10.))
2221        );
2222    }
2223
2224    #[test]
2225    fn quartiles_floats() {
2226        assert_eq!(
2227            quartiles(vec![3_f64, 5., 7.].into_iter()),
2228            Some((3., 5., 7.))
2229        );
2230        assert_eq!(
2231            quartiles(vec![3_f64, 5., 7., 9.].into_iter()),
2232            Some((4., 6., 8.))
2233        );
2234        assert_eq!(
2235            quartiles(vec![3_f64, 5., 7., 9., 12.].into_iter()),
2236            Some((4., 7., 10.5))
2237        );
2238        assert_eq!(
2239            quartiles(vec![3_f64, 5., 7., 9., 12., 20.].into_iter()),
2240            Some((5., 8., 12.))
2241        );
2242        assert_eq!(
2243            quartiles(vec![3_f64, 5., 7., 9., 12., 20., 21.].into_iter()),
2244            Some((5., 9., 20.))
2245        );
2246    }
2247
2248    #[test]
2249    fn test_quartiles_zero_copy_small() {
2250        // Test edge cases for zero-copy quartiles
2251        let unsorted: Unsorted<i32> = Unsorted::new();
2252        assert_eq!(unsorted.quartiles_zero_copy(), None);
2253
2254        let mut unsorted = Unsorted::new();
2255        unsorted.extend(vec![1, 2]);
2256        assert_eq!(unsorted.quartiles_zero_copy(), None);
2257
2258        let mut unsorted = Unsorted::new();
2259        unsorted.extend(vec![1, 2, 3]);
2260        assert_eq!(unsorted.quartiles_zero_copy(), Some((1.0, 2.0, 3.0)));
2261
2262        // Test larger case
2263        let mut unsorted = Unsorted::new();
2264        unsorted.extend(vec![3, 5, 7, 9]);
2265        assert_eq!(unsorted.quartiles_zero_copy(), Some((4.0, 6.0, 8.0)));
2266    }
2267
2268    #[test]
2269    fn gini_empty() {
2270        let mut unsorted: Unsorted<i32> = Unsorted::new();
2271        assert_eq!(unsorted.gini(None), None);
2272        let empty_vec: Vec<i32> = vec![];
2273        assert_eq!(gini(empty_vec.into_iter(), None), None);
2274    }
2275
2276    #[test]
2277    fn gini_single_element() {
2278        let mut unsorted = Unsorted::new();
2279        unsorted.add(5);
2280        assert_eq!(unsorted.gini(None), Some(0.0));
2281        assert_eq!(gini(vec![5].into_iter(), None), Some(0.0));
2282    }
2283
2284    #[test]
2285    fn gini_perfect_equality() {
2286        // All values are the same - perfect equality, Gini = 0
2287        let mut unsorted = Unsorted::new();
2288        unsorted.extend(vec![10, 10, 10, 10, 10]);
2289        let result = unsorted.gini(None).unwrap();
2290        assert!((result - 0.0).abs() < 1e-10, "Expected 0.0, got {}", result);
2291
2292        assert!((gini(vec![10, 10, 10, 10, 10].into_iter(), None).unwrap() - 0.0).abs() < 1e-10);
2293    }
2294
2295    #[test]
2296    fn gini_perfect_inequality() {
2297        // One value has everything, others have zero - perfect inequality
2298        // For [0, 0, 0, 0, 100], Gini should be close to 1
2299        let mut unsorted = Unsorted::new();
2300        unsorted.extend(vec![0, 0, 0, 0, 100]);
2301        let result = unsorted.gini(None).unwrap();
2302        // Perfect inequality should give Gini close to 1
2303        // For n=5, one value=100, others=0: G = (2*5*100)/(5*100) - 6/5 = 2 - 1.2 = 0.8
2304        assert!((result - 0.8).abs() < 1e-10, "Expected 0.8, got {}", result);
2305    }
2306
2307    #[test]
2308    fn gini_stream() {
2309        // Test with known values
2310        // For [1, 2, 3, 4, 5]:
2311        // sum = 15
2312        // weighted_sum = 1*1 + 2*2 + 3*3 + 4*4 + 5*5 = 1 + 4 + 9 + 16 + 25 = 55
2313        // n = 5
2314        // G = (2 * 55) / (5 * 15) - 6/5 = 110/75 - 1.2 = 1.4667 - 1.2 = 0.2667
2315        let result = gini(vec![1usize, 2, 3, 4, 5].into_iter(), None).unwrap();
2316        let expected = (2.0 * 55.0) / (5.0 * 15.0) - 6.0 / 5.0;
2317        assert!(
2318            (result - expected).abs() < 1e-10,
2319            "Expected {}, got {}",
2320            expected,
2321            result
2322        );
2323    }
2324
2325    #[test]
2326    fn gini_floats() {
2327        let mut unsorted = Unsorted::new();
2328        unsorted.extend(vec![1.0, 2.0, 3.0, 4.0, 5.0]);
2329        let result = unsorted.gini(None).unwrap();
2330        let expected = (2.0 * 55.0) / (5.0 * 15.0) - 6.0 / 5.0;
2331        assert!((result - expected).abs() < 1e-10);
2332
2333        assert!(
2334            (gini(vec![1.0f64, 2.0, 3.0, 4.0, 5.0].into_iter(), None).unwrap() - expected).abs()
2335                < 1e-10
2336        );
2337    }
2338
2339    #[test]
2340    fn gini_all_zeros() {
2341        // All zeros - sum is zero, Gini is undefined
2342        let mut unsorted = Unsorted::new();
2343        unsorted.extend(vec![0, 0, 0, 0]);
2344        assert_eq!(unsorted.gini(None), None);
2345        assert_eq!(gini(vec![0, 0, 0, 0].into_iter(), None), None);
2346    }
2347
2348    #[test]
2349    fn gini_negative_values() {
2350        // Test with negative values (mathematically valid)
2351        let mut unsorted = Unsorted::new();
2352        unsorted.extend(vec![-5, -3, -1, 1, 3, 5]);
2353        let result = unsorted.gini(None);
2354        // Sum is 0, so Gini is undefined
2355        assert_eq!(result, None);
2356
2357        // Test with negative values that don't sum to zero
2358        let mut unsorted = Unsorted::new();
2359        unsorted.extend(vec![-2, -1, 0, 1, 2]);
2360        let result = unsorted.gini(None);
2361        // Sum is 0, so Gini is undefined
2362        assert_eq!(result, None);
2363
2364        // Test with values that sum to non-zero
2365        let mut unsorted = Unsorted::new();
2366        unsorted.extend(vec![-1, 0, 1, 2, 3]);
2367        let result = unsorted.gini(None);
2368        assert!(result.is_some());
2369    }
2370
2371    #[test]
2372    fn gini_known_cases() {
2373        // Test case: [1, 1, 1, 1, 1] - perfect equality
2374        let mut unsorted = Unsorted::new();
2375        unsorted.extend(vec![1, 1, 1, 1, 1]);
2376        let result = unsorted.gini(None).unwrap();
2377        assert!((result - 0.0).abs() < 1e-10);
2378
2379        // Test case: [0, 0, 0, 0, 1] - high inequality
2380        let mut unsorted = Unsorted::new();
2381        unsorted.extend(vec![0, 0, 0, 0, 1]);
2382        let result = unsorted.gini(None).unwrap();
2383        // G = (2 * 5 * 1) / (5 * 1) - 6/5 = 2 - 1.2 = 0.8
2384        assert!((result - 0.8).abs() < 1e-10);
2385
2386        // Test case: [1, 2, 3] - moderate inequality
2387        let mut unsorted = Unsorted::new();
2388        unsorted.extend(vec![1, 2, 3]);
2389        let result = unsorted.gini(None).unwrap();
2390        // sum = 6, weighted_sum = 1*1 + 2*2 + 3*3 = 1 + 4 + 9 = 14
2391        // G = (2 * 14) / (3 * 6) - 4/3 = 28/18 - 4/3 = 1.5556 - 1.3333 = 0.2222
2392        let expected = (2.0 * 14.0) / (3.0 * 6.0) - 4.0 / 3.0;
2393        assert!((result - expected).abs() < 1e-10);
2394    }
2395
2396    #[test]
2397    fn gini_precalc_sum() {
2398        // Test with pre-calculated sum
2399        let mut unsorted = Unsorted::new();
2400        unsorted.extend(vec![1, 2, 3, 4, 5]);
2401        let precalc_sum = Some(15.0);
2402        let result = unsorted.gini(precalc_sum).unwrap();
2403        let expected = (2.0 * 55.0) / (5.0 * 15.0) - 6.0 / 5.0;
2404        assert!((result - expected).abs() < 1e-10);
2405
2406        // Test that pre-calculated sum gives same result
2407        let mut unsorted2 = Unsorted::new();
2408        unsorted2.extend(vec![1, 2, 3, 4, 5]);
2409        let result2 = unsorted2.gini(None).unwrap();
2410        assert!((result - result2).abs() < 1e-10);
2411    }
2412
2413    #[test]
2414    fn gini_large_dataset() {
2415        // Test with larger dataset to exercise parallel path
2416        let data: Vec<i32> = (1..=1000).collect();
2417        let result = gini(data.iter().copied(), None);
2418        assert!(result.is_some());
2419        let gini_val = result.unwrap();
2420        // For uniform distribution, Gini should be positive but not too high
2421        assert!(gini_val > 0.0 && gini_val < 0.5);
2422    }
2423
2424    #[test]
2425    fn gini_unsorted_vs_sorted() {
2426        // Test that sorting doesn't affect result
2427        let mut unsorted1 = Unsorted::new();
2428        unsorted1.extend(vec![5, 2, 8, 1, 9, 3, 7, 4, 6]);
2429        let result1 = unsorted1.gini(None).unwrap();
2430
2431        let mut unsorted2 = Unsorted::new();
2432        unsorted2.extend(vec![1, 2, 3, 4, 5, 6, 7, 8, 9]);
2433        let result2 = unsorted2.gini(None).unwrap();
2434
2435        assert!((result1 - result2).abs() < 1e-10);
2436    }
2437
2438    #[test]
2439    fn gini_small_values() {
2440        // Test with very small values
2441        let mut unsorted = Unsorted::new();
2442        unsorted.extend(vec![0.001, 0.002, 0.003, 0.004, 0.005]);
2443        let result = unsorted.gini(None);
2444        assert!(result.is_some());
2445        // Should be same as [1, 2, 3, 4, 5] scaled down
2446        let expected = (2.0 * 55.0) / (5.0 * 15.0) - 6.0 / 5.0;
2447        assert!((result.unwrap() - expected).abs() < 1e-10);
2448    }
2449
2450    #[test]
2451    fn gini_large_values() {
2452        // Test with large values
2453        let mut unsorted = Unsorted::new();
2454        unsorted.extend(vec![1000, 2000, 3000, 4000, 5000]);
2455        let result = unsorted.gini(None);
2456        assert!(result.is_some());
2457        // Should be same as [1, 2, 3, 4, 5] scaled up
2458        let expected = (2.0 * 55.0) / (5.0 * 15.0) - 6.0 / 5.0;
2459        assert!((result.unwrap() - expected).abs() < 1e-10);
2460    }
2461
2462    #[test]
2463    fn gini_two_elements() {
2464        // Test with exactly 2 elements
2465        let mut unsorted = Unsorted::new();
2466        unsorted.extend(vec![1, 2]);
2467        let result = unsorted.gini(None).unwrap();
2468        // For [1, 2]: sum=3, weighted_sum=1*1+2*2=5, n=2
2469        // G = (2*5)/(2*3) - 3/2 = 10/6 - 1.5 = 1.6667 - 1.5 = 0.1667
2470        let expected = (2.0 * 5.0) / (2.0 * 3.0) - 3.0 / 2.0;
2471        assert!((result - expected).abs() < 1e-10);
2472    }
2473
2474    #[test]
2475    fn gini_precalc_sum_zero() {
2476        // Test with pre-calculated sum of zero (should return None)
2477        let mut unsorted = Unsorted::new();
2478        unsorted.extend(vec![1, 2, 3, 4, 5]);
2479        let result = unsorted.gini(Some(0.0));
2480        assert_eq!(result, None);
2481    }
2482
2483    #[test]
2484    fn gini_precalc_sum_negative() {
2485        // Test with negative sum
2486        // The function only checks if sum == 0.0, not if sum < 0.0
2487        // So negative sums will still compute Gini (though mathematically questionable)
2488        let mut unsorted = Unsorted::new();
2489        unsorted.extend(vec![-5, -3, -1, 1, 3]);
2490        let result = unsorted.gini(None);
2491        // Sum is -5, function doesn't check for negative, so it computes Gini
2492        // This is technically invalid but the function allows it
2493        assert!(result.is_some());
2494    }
2495
2496    #[test]
2497    fn gini_different_types() {
2498        // Test with different integer types
2499        let mut unsorted_u32 = Unsorted::new();
2500        unsorted_u32.extend(vec![1u32, 2, 3, 4, 5]);
2501        let result_u32 = unsorted_u32.gini(None).unwrap();
2502
2503        let mut unsorted_i64 = Unsorted::new();
2504        unsorted_i64.extend(vec![1i64, 2, 3, 4, 5]);
2505        let result_i64 = unsorted_i64.gini(None).unwrap();
2506
2507        let expected = (2.0 * 55.0) / (5.0 * 15.0) - 6.0 / 5.0;
2508        assert!((result_u32 - expected).abs() < 1e-10);
2509        assert!((result_i64 - expected).abs() < 1e-10);
2510    }
2511
2512    #[test]
2513    fn gini_extreme_inequality() {
2514        // Test with extreme inequality: one very large value, many zeros
2515        let mut unsorted = Unsorted::new();
2516        unsorted.extend(vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 1000]);
2517        let result = unsorted.gini(None).unwrap();
2518        // For [0,0,0,0,0,0,0,0,0,1000]: sum=1000, weighted_sum=10*1000=10000, n=10
2519        // G = (2*10000)/(10*1000) - 11/10 = 20/10 - 1.1 = 2 - 1.1 = 0.9
2520        assert!((result - 0.9).abs() < 1e-10);
2521    }
2522
2523    #[test]
2524    fn gini_duplicate_values() {
2525        // Test with many duplicate values
2526        let mut unsorted = Unsorted::new();
2527        unsorted.extend(vec![1, 1, 1, 5, 5, 5, 10, 10, 10]);
2528        let result = unsorted.gini(None);
2529        assert!(result.is_some());
2530        // Should be between 0 and 1
2531        let gini_val = result.unwrap();
2532        assert!(gini_val >= 0.0 && gini_val <= 1.0);
2533    }
2534
2535    #[test]
2536    fn kurtosis_empty() {
2537        let mut unsorted: Unsorted<i32> = Unsorted::new();
2538        assert_eq!(unsorted.kurtosis(None, None), None);
2539        let empty_vec: Vec<i32> = vec![];
2540        assert_eq!(kurtosis(empty_vec.into_iter(), None, None), None);
2541    }
2542
2543    #[test]
2544    fn kurtosis_small() {
2545        // Need at least 4 elements
2546        let mut unsorted = Unsorted::new();
2547        unsorted.extend(vec![1, 2]);
2548        assert_eq!(unsorted.kurtosis(None, None), None);
2549
2550        let mut unsorted = Unsorted::new();
2551        unsorted.extend(vec![1, 2, 3]);
2552        assert_eq!(unsorted.kurtosis(None, None), None);
2553    }
2554
2555    #[test]
2556    fn kurtosis_normal_distribution() {
2557        // Normal distribution should have kurtosis close to 0
2558        let mut unsorted = Unsorted::new();
2559        unsorted.extend(vec![1, 2, 3, 4, 5]);
2560        let result = unsorted.kurtosis(None, None);
2561        assert!(result.is_some());
2562        // For small samples, kurtosis can vary significantly
2563    }
2564
2565    #[test]
2566    fn kurtosis_all_same() {
2567        // All same values - variance is 0, kurtosis undefined
2568        let mut unsorted = Unsorted::new();
2569        unsorted.extend(vec![5, 5, 5, 5]);
2570        assert_eq!(unsorted.kurtosis(None, None), None);
2571    }
2572
2573    #[test]
2574    fn kurtosis_stream() {
2575        let result = kurtosis(vec![1usize, 2, 3, 4, 5].into_iter(), None, None);
2576        assert!(result.is_some());
2577    }
2578
2579    #[test]
2580    fn kurtosis_precalc_mean_variance() {
2581        // Test with pre-calculated mean and variance
2582        let mut unsorted = Unsorted::new();
2583        unsorted.extend(vec![1, 2, 3, 4, 5]);
2584
2585        // Calculate mean and variance manually
2586        let mean = 3.0f64;
2587        let variance = ((1.0f64 - 3.0).powi(2)
2588            + (2.0f64 - 3.0).powi(2)
2589            + (3.0f64 - 3.0).powi(2)
2590            + (4.0f64 - 3.0).powi(2)
2591            + (5.0f64 - 3.0).powi(2))
2592            / 5.0;
2593
2594        let result = unsorted.kurtosis(Some(mean), Some(variance));
2595        assert!(result.is_some());
2596
2597        // Test that pre-calculated values give same result
2598        let mut unsorted2 = Unsorted::new();
2599        unsorted2.extend(vec![1, 2, 3, 4, 5]);
2600        let result2 = unsorted2.kurtosis(None, None);
2601        assert!((result.unwrap() - result2.unwrap()).abs() < 1e-10);
2602    }
2603
2604    #[test]
2605    fn kurtosis_precalc_mean_only() {
2606        // Test with pre-calculated mean only
2607        let mut unsorted = Unsorted::new();
2608        unsorted.extend(vec![1, 2, 3, 4, 5]);
2609        let mean = 3.0f64;
2610
2611        let result = unsorted.kurtosis(Some(mean), None);
2612        assert!(result.is_some());
2613
2614        // Test that pre-calculated mean gives same result
2615        let mut unsorted2 = Unsorted::new();
2616        unsorted2.extend(vec![1, 2, 3, 4, 5]);
2617        let result2 = unsorted2.kurtosis(None, None);
2618        assert!((result.unwrap() - result2.unwrap()).abs() < 1e-10);
2619    }
2620
2621    #[test]
2622    fn kurtosis_precalc_variance_only() {
2623        // Test with pre-calculated variance only
2624        let mut unsorted = Unsorted::new();
2625        unsorted.extend(vec![1, 2, 3, 4, 5]);
2626        let variance = ((1.0f64 - 3.0).powi(2)
2627            + (2.0f64 - 3.0).powi(2)
2628            + (3.0f64 - 3.0).powi(2)
2629            + (4.0f64 - 3.0).powi(2)
2630            + (5.0f64 - 3.0).powi(2))
2631            / 5.0;
2632
2633        let result = unsorted.kurtosis(None, Some(variance));
2634        assert!(result.is_some());
2635
2636        // Test that pre-calculated variance gives same result
2637        let mut unsorted2 = Unsorted::new();
2638        unsorted2.extend(vec![1, 2, 3, 4, 5]);
2639        let result2 = unsorted2.kurtosis(None, None);
2640        assert!((result.unwrap() - result2.unwrap()).abs() < 1e-10);
2641    }
2642
2643    #[test]
2644    fn kurtosis_exact_calculation() {
2645        // Test with exact calculation for [1, 2, 3, 4]
2646        // Mean = 2.5
2647        // Variance = ((1-2.5)^2 + (2-2.5)^2 + (3-2.5)^2 + (4-2.5)^2) / 4 = (2.25 + 0.25 + 0.25 + 2.25) / 4 = 1.25
2648        // Variance^2 = 1.5625
2649        // Fourth powers: (1-2.5)^4 + (2-2.5)^4 + (3-2.5)^4 + (4-2.5)^4 = 5.0625 + 0.0625 + 0.0625 + 5.0625 = 10.25
2650        // n = 4
2651        // Kurtosis = (4*5*10.25) / (3*2*1*1.5625) - 3*3*3/(2*1) = 205 / 9.375 - 13.5 = 21.8667 - 13.5 = 8.3667
2652        let mut unsorted = Unsorted::new();
2653        unsorted.extend(vec![1, 2, 3, 4]);
2654        let result = unsorted.kurtosis(None, None).unwrap();
2655        // For small samples, kurtosis can be very high
2656        assert!(result.is_finite());
2657    }
2658
2659    #[test]
2660    fn kurtosis_uniform_distribution() {
2661        // Uniform distribution should have negative excess kurtosis
2662        let mut unsorted = Unsorted::new();
2663        unsorted.extend(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
2664        let result = unsorted.kurtosis(None, None).unwrap();
2665        // Uniform distribution has excess kurtosis = -1.2
2666        // But for small samples, it can vary significantly
2667        assert!(result.is_finite());
2668    }
2669
2670    #[test]
2671    fn kurtosis_large_dataset() {
2672        // Test with larger dataset to exercise parallel path
2673        let data: Vec<i32> = (1..=1000).collect();
2674        let result = kurtosis(data.iter().copied(), None, None);
2675        assert!(result.is_some());
2676        let kurt_val = result.unwrap();
2677        assert!(kurt_val.is_finite());
2678    }
2679
2680    #[test]
2681    fn kurtosis_unsorted_vs_sorted() {
2682        // Test that sorting doesn't affect result
2683        let mut unsorted1 = Unsorted::new();
2684        unsorted1.extend(vec![5, 2, 8, 1, 9, 3, 7, 4, 6]);
2685        let result1 = unsorted1.kurtosis(None, None).unwrap();
2686
2687        let mut unsorted2 = Unsorted::new();
2688        unsorted2.extend(vec![1, 2, 3, 4, 5, 6, 7, 8, 9]);
2689        let result2 = unsorted2.kurtosis(None, None).unwrap();
2690
2691        assert!((result1 - result2).abs() < 1e-10);
2692    }
2693
2694    #[test]
2695    fn kurtosis_minimum_size() {
2696        // Test with exactly 4 elements (minimum required)
2697        let mut unsorted = Unsorted::new();
2698        unsorted.extend(vec![1, 2, 3, 4]);
2699        let result = unsorted.kurtosis(None, None);
2700        assert!(result.is_some());
2701        assert!(result.unwrap().is_finite());
2702    }
2703
2704    #[test]
2705    fn kurtosis_heavy_tailed() {
2706        // Test with heavy-tailed distribution (outliers)
2707        let mut unsorted = Unsorted::new();
2708        unsorted.extend(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 100]);
2709        let result = unsorted.kurtosis(None, None).unwrap();
2710        // Heavy tails should give positive excess kurtosis
2711        assert!(result.is_finite());
2712        // With an outlier, kurtosis should be positive
2713        assert!(result > -10.0); // Allow some variance but should be reasonable
2714    }
2715
2716    #[test]
2717    fn kurtosis_light_tailed() {
2718        // Test with light-tailed distribution (values close together)
2719        let mut unsorted = Unsorted::new();
2720        unsorted.extend(vec![10, 11, 12, 13, 14, 15, 16, 17, 18, 19]);
2721        let result = unsorted.kurtosis(None, None).unwrap();
2722        // Light tails might give negative excess kurtosis
2723        assert!(result.is_finite());
2724    }
2725
2726    #[test]
2727    fn kurtosis_small_variance() {
2728        // Test with very small variance (values very close together)
2729        let mut unsorted = Unsorted::new();
2730        unsorted.extend(vec![10.0, 10.001, 10.002, 10.003, 10.004]);
2731        let result = unsorted.kurtosis(None, None);
2732        // Should still compute (variance is very small but non-zero)
2733        assert!(result.is_some());
2734        assert!(result.unwrap().is_finite());
2735    }
2736
2737    #[test]
2738    fn kurtosis_precalc_zero_variance() {
2739        // Test with pre-calculated variance of zero (should return None)
2740        let mut unsorted = Unsorted::new();
2741        unsorted.extend(vec![1, 2, 3, 4, 5]);
2742        let result = unsorted.kurtosis(None, Some(0.0));
2743        assert_eq!(result, None);
2744    }
2745
2746    #[test]
2747    fn kurtosis_precalc_negative_variance() {
2748        // Test with negative variance (invalid, but should handle gracefully)
2749        let mut unsorted = Unsorted::new();
2750        unsorted.extend(vec![1, 2, 3, 4, 5]);
2751        // Negative variance is invalid, but function should handle it
2752        let result = unsorted.kurtosis(None, Some(-1.0));
2753        // Should either return None or handle it gracefully
2754        // The function computes variance_sq = variance^2, so negative becomes positive
2755        // But this is invalid input, so behavior may vary
2756        // For now, just check it doesn't panic
2757        let _ = result;
2758    }
2759
2760    #[test]
2761    fn kurtosis_different_types() {
2762        // Test with different integer types
2763        let mut unsorted_u32 = Unsorted::new();
2764        unsorted_u32.extend(vec![1u32, 2, 3, 4, 5]);
2765        let result_u32 = unsorted_u32.kurtosis(None, None).unwrap();
2766
2767        let mut unsorted_i64 = Unsorted::new();
2768        unsorted_i64.extend(vec![1i64, 2, 3, 4, 5]);
2769        let result_i64 = unsorted_i64.kurtosis(None, None).unwrap();
2770
2771        assert!((result_u32 - result_i64).abs() < 1e-10);
2772    }
2773
2774    #[test]
2775    fn kurtosis_floating_point_precision() {
2776        // Test floating point precision
2777        let mut unsorted = Unsorted::new();
2778        unsorted.extend(vec![1.1, 2.2, 3.3, 4.4, 5.5]);
2779        let result = unsorted.kurtosis(None, None);
2780        assert!(result.is_some());
2781        assert!(result.unwrap().is_finite());
2782    }
2783
2784    #[test]
2785    fn kurtosis_negative_values() {
2786        // Test with negative values
2787        let mut unsorted = Unsorted::new();
2788        unsorted.extend(vec![-5, -3, -1, 1, 3, 5]);
2789        let result = unsorted.kurtosis(None, None);
2790        assert!(result.is_some());
2791        assert!(result.unwrap().is_finite());
2792    }
2793
2794    #[test]
2795    fn kurtosis_mixed_positive_negative() {
2796        // Test with mixed positive and negative values
2797        let mut unsorted = Unsorted::new();
2798        unsorted.extend(vec![-10, -5, 0, 5, 10]);
2799        let result = unsorted.kurtosis(None, None);
2800        assert!(result.is_some());
2801        assert!(result.unwrap().is_finite());
2802    }
2803
2804    #[test]
2805    fn kurtosis_duplicate_values() {
2806        // Test with duplicate values (but not all same)
2807        let mut unsorted = Unsorted::new();
2808        unsorted.extend(vec![1, 1, 2, 2, 3, 3, 4, 4, 5, 5]);
2809        let result = unsorted.kurtosis(None, None);
2810        assert!(result.is_some());
2811        assert!(result.unwrap().is_finite());
2812    }
2813
2814    #[test]
2815    fn kurtosis_precalc_mean_wrong() {
2816        // Test that wrong pre-calculated mean gives wrong result
2817        let mut unsorted1 = Unsorted::new();
2818        unsorted1.extend(vec![1, 2, 3, 4, 5]);
2819        let correct_result = unsorted1.kurtosis(None, None).unwrap();
2820
2821        let mut unsorted2 = Unsorted::new();
2822        unsorted2.extend(vec![1, 2, 3, 4, 5]);
2823        let wrong_mean = 10.0; // Wrong mean
2824        let wrong_result = unsorted2.kurtosis(Some(wrong_mean), None).unwrap();
2825
2826        // Results should be different
2827        assert!((correct_result - wrong_result).abs() > 1e-5);
2828    }
2829
2830    #[test]
2831    fn percentile_rank_empty() {
2832        let mut unsorted: Unsorted<i32> = Unsorted::new();
2833        assert_eq!(unsorted.percentile_rank(5), None);
2834        let empty_vec: Vec<i32> = vec![];
2835        assert_eq!(percentile_rank(empty_vec.into_iter(), 5), None);
2836    }
2837
2838    #[test]
2839    fn percentile_rank_basic() {
2840        let mut unsorted = Unsorted::new();
2841        unsorted.extend(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
2842
2843        // Value less than all
2844        assert_eq!(unsorted.percentile_rank(0), Some(0.0));
2845
2846        // Value greater than all
2847        assert_eq!(unsorted.percentile_rank(11), Some(100.0));
2848
2849        // Median (5) should be around 50th percentile
2850        let rank = unsorted.percentile_rank(5).unwrap();
2851        assert!((rank - 50.0).abs() < 1.0);
2852
2853        // First value should be at 10th percentile
2854        let rank = unsorted.percentile_rank(1).unwrap();
2855        assert!((rank - 10.0).abs() < 1.0);
2856    }
2857
2858    #[test]
2859    fn percentile_rank_duplicates() {
2860        let mut unsorted = Unsorted::new();
2861        unsorted.extend(vec![1, 1, 2, 2, 3, 3, 4, 4, 5, 5]);
2862
2863        // Value 2 appears twice, should be at 40th percentile (4 values <= 2)
2864        let rank = unsorted.percentile_rank(2).unwrap();
2865        assert!((rank - 40.0).abs() < 1.0);
2866    }
2867
2868    #[test]
2869    fn percentile_rank_stream() {
2870        let result = percentile_rank(vec![1usize, 2, 3, 4, 5].into_iter(), 3);
2871        assert_eq!(result, Some(60.0)); // 3 out of 5 values <= 3
2872    }
2873
2874    #[test]
2875    fn atkinson_empty() {
2876        let mut unsorted: Unsorted<i32> = Unsorted::new();
2877        assert_eq!(unsorted.atkinson(1.0, None, None), None);
2878        let empty_vec: Vec<i32> = vec![];
2879        assert_eq!(atkinson(empty_vec.into_iter(), 1.0, None, None), None);
2880    }
2881
2882    #[test]
2883    fn atkinson_single_element() {
2884        let mut unsorted = Unsorted::new();
2885        unsorted.add(5);
2886        assert_eq!(unsorted.atkinson(1.0, None, None), Some(0.0));
2887        assert_eq!(atkinson(vec![5].into_iter(), 1.0, None, None), Some(0.0));
2888    }
2889
2890    #[test]
2891    fn atkinson_perfect_equality() {
2892        // All values the same - perfect equality, Atkinson = 0
2893        let mut unsorted = Unsorted::new();
2894        unsorted.extend(vec![10, 10, 10, 10, 10]);
2895        let result = unsorted.atkinson(1.0, None, None).unwrap();
2896        assert!((result - 0.0).abs() < 1e-10);
2897    }
2898
2899    #[test]
2900    fn atkinson_epsilon_zero() {
2901        // Epsilon = 0 means no inequality aversion, should return 0
2902        let mut unsorted = Unsorted::new();
2903        unsorted.extend(vec![1, 2, 3, 4, 5]);
2904        let result = unsorted.atkinson(0.0, None, None).unwrap();
2905        assert!((result - 0.0).abs() < 1e-10);
2906    }
2907
2908    #[test]
2909    fn atkinson_epsilon_one() {
2910        // Epsilon = 1 uses geometric mean
2911        let mut unsorted = Unsorted::new();
2912        unsorted.extend(vec![1, 2, 3, 4, 5]);
2913        let result = unsorted.atkinson(1.0, None, None);
2914        assert!(result.is_some());
2915    }
2916
2917    #[test]
2918    fn atkinson_negative_epsilon() {
2919        let mut unsorted = Unsorted::new();
2920        unsorted.extend(vec![1, 2, 3, 4, 5]);
2921        assert_eq!(unsorted.atkinson(-1.0, None, None), None);
2922    }
2923
2924    #[test]
2925    fn atkinson_zero_mean() {
2926        // If mean is zero, Atkinson is undefined
2927        let mut unsorted = Unsorted::new();
2928        unsorted.extend(vec![0, 0, 0, 0]);
2929        assert_eq!(unsorted.atkinson(1.0, None, None), None);
2930    }
2931
2932    #[test]
2933    fn atkinson_stream() {
2934        let result = atkinson(vec![1usize, 2, 3, 4, 5].into_iter(), 1.0, None, None);
2935        assert!(result.is_some());
2936    }
2937
2938    #[test]
2939    fn atkinson_precalc_mean_geometric_sum() {
2940        // Test with pre-calculated mean and geometric_sum
2941        let mut unsorted = Unsorted::new();
2942        unsorted.extend(vec![1, 2, 3, 4, 5]);
2943
2944        // Calculate mean and geometric_sum manually
2945        let mean = 3.0f64;
2946        let geometric_sum = 1.0f64.ln() + 2.0f64.ln() + 3.0f64.ln() + 4.0f64.ln() + 5.0f64.ln();
2947
2948        let result = unsorted.atkinson(1.0, Some(mean), Some(geometric_sum));
2949        assert!(result.is_some());
2950
2951        // Test that pre-calculated values give same result
2952        let mut unsorted2 = Unsorted::new();
2953        unsorted2.extend(vec![1, 2, 3, 4, 5]);
2954        let result2 = unsorted2.atkinson(1.0, None, None);
2955        assert!((result.unwrap() - result2.unwrap()).abs() < 1e-10);
2956    }
2957
2958    #[test]
2959    fn atkinson_precalc_mean_only() {
2960        // Test with pre-calculated mean only
2961        let mut unsorted = Unsorted::new();
2962        unsorted.extend(vec![1, 2, 3, 4, 5]);
2963        let mean = 3.0f64;
2964
2965        let result = unsorted.atkinson(1.0, Some(mean), None);
2966        assert!(result.is_some());
2967
2968        // Test that pre-calculated mean gives same result
2969        let mut unsorted2 = Unsorted::new();
2970        unsorted2.extend(vec![1, 2, 3, 4, 5]);
2971        let result2 = unsorted2.atkinson(1.0, None, None);
2972        assert!((result.unwrap() - result2.unwrap()).abs() < 1e-10);
2973    }
2974
2975    #[test]
2976    fn atkinson_precalc_geometric_sum_only() {
2977        // Test with pre-calculated geometric_sum only
2978        let mut unsorted = Unsorted::new();
2979        unsorted.extend(vec![1, 2, 3, 4, 5]);
2980        let geometric_sum = 1.0f64.ln() + 2.0f64.ln() + 3.0f64.ln() + 4.0f64.ln() + 5.0f64.ln();
2981
2982        let result = unsorted.atkinson(1.0, None, Some(geometric_sum));
2983        assert!(result.is_some());
2984
2985        // Test that pre-calculated geometric_sum gives same result
2986        let mut unsorted2 = Unsorted::new();
2987        unsorted2.extend(vec![1, 2, 3, 4, 5]);
2988        let result2 = unsorted2.atkinson(1.0, None, None);
2989        assert!((result.unwrap() - result2.unwrap()).abs() < 1e-10);
2990    }
2991}
2992
2993#[cfg(test)]
2994mod bench {
2995    use super::*;
2996    use std::time::Instant;
2997
2998    #[test]
2999    #[ignore] // Run with `cargo test comprehensive_quartiles_benchmark -- --ignored --nocapture` to see performance comparison
3000    fn comprehensive_quartiles_benchmark() {
3001        // Test a wide range of data sizes
3002        let data_sizes = vec![
3003            1_000, 10_000, 100_000, 500_000, 1_000_000, 2_000_000, 5_000_000, 10_000_000,
3004        ];
3005
3006        println!("=== COMPREHENSIVE QUARTILES BENCHMARK ===\n");
3007
3008        for size in data_sizes {
3009            println!("--- Testing with {} elements ---", size);
3010
3011            // Test different data patterns
3012            let test_patterns = vec![
3013                ("Random", generate_random_data(size)),
3014                ("Reverse Sorted", {
3015                    let mut v = Vec::with_capacity(size);
3016                    for x in (0..size).rev() {
3017                        v.push(x as i32);
3018                    }
3019                    v
3020                }),
3021                ("Already Sorted", {
3022                    let mut v = Vec::with_capacity(size);
3023                    for x in 0..size {
3024                        v.push(x as i32);
3025                    }
3026                    v
3027                }),
3028                ("Many Duplicates", {
3029                    // Create a vector with just a few distinct values repeated many times
3030                    let mut v = Vec::with_capacity(size);
3031                    let chunk_size = size / 100;
3032                    for i in 0..100 {
3033                        v.extend(std::iter::repeat_n(i, chunk_size));
3034                    }
3035                    // Add any remaining elements
3036                    v.extend(std::iter::repeat_n(0, size - v.len()));
3037                    v
3038                }),
3039            ];
3040
3041            for (pattern_name, test_data) in test_patterns {
3042                println!("\n  Pattern: {}", pattern_name);
3043
3044                // Benchmark sorting-based approach
3045                let mut unsorted1 = Unsorted::new();
3046                unsorted1.extend(test_data.clone());
3047
3048                let start = Instant::now();
3049                let result_sorted = unsorted1.quartiles();
3050                let sorted_time = start.elapsed();
3051
3052                // Benchmark selection-based approach (with copying)
3053                let mut unsorted2 = Unsorted::new();
3054                unsorted2.extend(test_data.clone());
3055
3056                let start = Instant::now();
3057                let result_selection = unsorted2.quartiles_with_selection();
3058                let selection_time = start.elapsed();
3059
3060                // Benchmark zero-copy selection-based approach
3061                let mut unsorted3 = Unsorted::new();
3062                unsorted3.extend(test_data);
3063
3064                let start = Instant::now();
3065                let result_zero_copy = unsorted3.quartiles_zero_copy();
3066                let zero_copy_time = start.elapsed();
3067
3068                // Verify results are the same
3069                assert_eq!(result_sorted, result_selection);
3070                assert_eq!(result_sorted, result_zero_copy);
3071
3072                let selection_speedup =
3073                    sorted_time.as_nanos() as f64 / selection_time.as_nanos() as f64;
3074                let zero_copy_speedup =
3075                    sorted_time.as_nanos() as f64 / zero_copy_time.as_nanos() as f64;
3076
3077                println!("    Sorting:       {:>12?}", sorted_time);
3078                println!(
3079                    "    Selection:     {:>12?} (speedup: {:.2}x)",
3080                    selection_time, selection_speedup
3081                );
3082                println!(
3083                    "    Zero-copy:     {:>12?} (speedup: {:.2}x)",
3084                    zero_copy_time, zero_copy_speedup
3085                );
3086
3087                let best_algorithm =
3088                    if zero_copy_speedup > 1.0 && zero_copy_speedup >= selection_speedup {
3089                        "ZERO-COPY"
3090                    } else if selection_speedup > 1.0 {
3091                        "SELECTION"
3092                    } else {
3093                        "SORTING"
3094                    };
3095                println!("    Best: {}", best_algorithm);
3096            }
3097
3098            println!(); // Add blank line between sizes
3099        }
3100    }
3101
3102    // Generate random data for benchmarking
3103    fn generate_random_data(size: usize) -> Vec<i32> {
3104        // Simple LCG random number generator for reproducible results
3105        let mut rng = 1234567u64;
3106        let mut vec = Vec::with_capacity(size);
3107        for _ in 0..size {
3108            rng = rng.wrapping_mul(1103515245).wrapping_add(12345);
3109            vec.push((rng >> 16) as i32);
3110        }
3111        vec
3112    }
3113
3114    #[test]
3115    #[ignore] // Run with `cargo test find_selection_threshold -- --ignored --nocapture` to find exact threshold
3116    fn find_selection_threshold() {
3117        println!("=== FINDING SELECTION ALGORITHM THRESHOLD ===\n");
3118
3119        // Binary search approach to find the threshold
3120        let mut found_threshold = None;
3121        let test_sizes = vec![
3122            1_000_000, 2_000_000, 3_000_000, 4_000_000, 5_000_000, 7_500_000, 10_000_000,
3123            15_000_000, 20_000_000, 25_000_000, 30_000_000,
3124        ];
3125
3126        for size in test_sizes {
3127            println!("Testing size: {}", size);
3128
3129            // Use random data as it's most representative of real-world scenarios
3130            let test_data = generate_random_data(size);
3131
3132            // Run multiple iterations to get average performance
3133            let iterations = 3;
3134            let mut sorting_total = 0u128;
3135            let mut selection_total = 0u128;
3136            let mut zero_copy_total = 0u128;
3137
3138            for i in 0..iterations {
3139                println!("  Iteration {}/{}", i + 1, iterations);
3140
3141                // Sorting approach
3142                let mut unsorted1 = Unsorted::new();
3143                unsorted1.extend(test_data.clone());
3144
3145                let start = Instant::now();
3146                let _result_sorted = unsorted1.quartiles();
3147                sorting_total += start.elapsed().as_nanos();
3148
3149                // Selection approach (with copying)
3150                let mut unsorted2 = Unsorted::new();
3151                unsorted2.extend(test_data.clone());
3152
3153                let start = Instant::now();
3154                let _result_selection = unsorted2.quartiles_with_selection();
3155                selection_total += start.elapsed().as_nanos();
3156
3157                // Zero-copy selection approach
3158                let mut unsorted3 = Unsorted::new();
3159                unsorted3.extend(test_data.clone());
3160
3161                let start = Instant::now();
3162                let _result_zero_copy = unsorted3.quartiles_zero_copy();
3163                zero_copy_total += start.elapsed().as_nanos();
3164            }
3165
3166            let avg_sorting = sorting_total / iterations as u128;
3167            let avg_selection = selection_total / iterations as u128;
3168            let avg_zero_copy = zero_copy_total / iterations as u128;
3169            let selection_speedup = avg_sorting as f64 / avg_selection as f64;
3170            let zero_copy_speedup = avg_sorting as f64 / avg_zero_copy as f64;
3171
3172            println!(
3173                "  Average sorting:    {:>12.2}ms",
3174                avg_sorting as f64 / 1_000_000.0
3175            );
3176            println!(
3177                "  Average selection:  {:>12.2}ms (speedup: {:.2}x)",
3178                avg_selection as f64 / 1_000_000.0,
3179                selection_speedup
3180            );
3181            println!(
3182                "  Average zero-copy:  {:>12.2}ms (speedup: {:.2}x)",
3183                avg_zero_copy as f64 / 1_000_000.0,
3184                zero_copy_speedup
3185            );
3186
3187            if (selection_speedup > 1.0 || zero_copy_speedup > 1.0) && found_threshold.is_none() {
3188                found_threshold = Some(size);
3189                let best_method = if zero_copy_speedup > selection_speedup {
3190                    "Zero-copy"
3191                } else {
3192                    "Selection"
3193                };
3194                println!(
3195                    "  *** THRESHOLD FOUND: {} becomes faster at {} elements ***",
3196                    best_method, size
3197                );
3198            }
3199
3200            println!();
3201        }
3202
3203        match found_threshold {
3204            Some(threshold) => println!(
3205                "🎯 Selection algorithm becomes faster at approximately {} elements",
3206                threshold
3207            ),
3208            None => println!("❌ Selection algorithm did not become faster in the tested range"),
3209        }
3210    }
3211
3212    #[test]
3213    #[ignore] // Run with `cargo test benchmark_different_data_types -- --ignored --nocapture` to test different data types
3214    fn benchmark_different_data_types() {
3215        println!("=== BENCHMARKING DIFFERENT DATA TYPES ===\n");
3216
3217        let size = 5_000_000; // Use a large size where differences might be visible
3218
3219        // Test with f64 (floating point)
3220        println!("Testing with f64 data:");
3221        let float_data: Vec<f64> = generate_random_data(size)
3222            .into_iter()
3223            .map(|x| x as f64 / 1000.0)
3224            .collect();
3225
3226        let mut unsorted1 = Unsorted::new();
3227        unsorted1.extend(float_data.clone());
3228        let start = Instant::now();
3229        let _result = unsorted1.quartiles();
3230        let sorting_time = start.elapsed();
3231
3232        let mut unsorted2 = Unsorted::new();
3233        unsorted2.extend(float_data.clone());
3234        let start = Instant::now();
3235        let _result = unsorted2.quartiles_with_selection();
3236        let selection_time = start.elapsed();
3237
3238        let mut unsorted3 = Unsorted::new();
3239        unsorted3.extend(float_data);
3240        let start = Instant::now();
3241        let _result = unsorted3.quartiles_zero_copy();
3242        let zero_copy_time = start.elapsed();
3243
3244        println!("  Sorting:    {:?}", sorting_time);
3245        println!("  Selection:  {:?}", selection_time);
3246        println!("  Zero-copy:  {:?}", zero_copy_time);
3247        println!(
3248            "  Selection Speedup:  {:.2}x",
3249            sorting_time.as_nanos() as f64 / selection_time.as_nanos() as f64
3250        );
3251        println!(
3252            "  Zero-copy Speedup:  {:.2}x\n",
3253            sorting_time.as_nanos() as f64 / zero_copy_time.as_nanos() as f64
3254        );
3255
3256        // Test with i64 (larger integers)
3257        println!("Testing with i64 data:");
3258        let int64_data: Vec<i64> = generate_random_data(size)
3259            .into_iter()
3260            .map(|x| x as i64 * 1000)
3261            .collect();
3262
3263        let mut unsorted1 = Unsorted::new();
3264        unsorted1.extend(int64_data.clone());
3265        let start = Instant::now();
3266        let _result = unsorted1.quartiles();
3267        let sorting_time = start.elapsed();
3268
3269        let mut unsorted2 = Unsorted::new();
3270        unsorted2.extend(int64_data.clone());
3271        let start = Instant::now();
3272        let _result = unsorted2.quartiles_with_selection();
3273        let selection_time = start.elapsed();
3274
3275        let mut unsorted3 = Unsorted::new();
3276        unsorted3.extend(int64_data);
3277        let start = Instant::now();
3278        let _result = unsorted3.quartiles_zero_copy();
3279        let zero_copy_time = start.elapsed();
3280
3281        println!("  Sorting:    {:?}", sorting_time);
3282        println!("  Selection:  {:?}", selection_time);
3283        println!("  Zero-copy:  {:?}", zero_copy_time);
3284        println!(
3285            "  Selection Speedup:  {:.2}x",
3286            sorting_time.as_nanos() as f64 / selection_time.as_nanos() as f64
3287        );
3288        println!(
3289            "  Zero-copy Speedup:  {:.2}x",
3290            sorting_time.as_nanos() as f64 / zero_copy_time.as_nanos() as f64
3291        );
3292    }
3293}
stats/unsorted.rs

stats/
unsorted.rs