stats/
unsorted.rs

1use num_traits::ToPrimitive;
2use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
3use rayon::prelude::ParallelSlice;
4use rayon::slice::ParallelSliceMut;
5
6use serde::{Deserialize, Serialize};
7
8use {crate::Commute, crate::Partial};
9
10const PARALLEL_THRESHOLD: usize = 10_000;
11
12/// Compute the exact median on a stream of data.
13///
14/// (This has time complexity `O(nlogn)` and space complexity `O(n)`.)
15pub fn median<I>(it: I) -> Option<f64>
16where
17    I: Iterator,
18    <I as Iterator>::Item: PartialOrd + ToPrimitive,
19{
20    it.collect::<Unsorted<_>>().median()
21}
22
23/// Compute the median absolute deviation (MAD) on a stream of data.
24pub fn mad<I>(it: I, precalc_median: Option<f64>) -> Option<f64>
25where
26    I: Iterator,
27    <I as Iterator>::Item: PartialOrd + ToPrimitive,
28{
29    it.collect::<Unsorted<_>>().mad(precalc_median)
30}
31
32/// Compute the exact 1-, 2-, and 3-quartiles (Q1, Q2 a.k.a. median, and Q3) on a stream of data.
33///
34/// (This has time complexity `O(n log n)` and space complexity `O(n)`.)
35pub fn quartiles<I>(it: I) -> Option<(f64, f64, f64)>
36where
37    I: Iterator,
38    <I as Iterator>::Item: PartialOrd + ToPrimitive,
39{
40    it.collect::<Unsorted<_>>().quartiles()
41}
42
43/// Compute the exact mode on a stream of data.
44///
45/// (This has time complexity `O(nlogn)` and space complexity `O(n)`.)
46///
47/// If the data does not have a mode, then `None` is returned.
48pub fn mode<T, I>(it: I) -> Option<T>
49where
50    T: PartialOrd + Clone,
51    I: Iterator<Item = T>,
52{
53    it.collect::<Unsorted<T>>().mode()
54}
55
56/// Compute the modes on a stream of data.
57///
58/// If there is a single mode, then only that value is returned in the `Vec`
59/// however, if there are multiple values tied for occurring the most amount of times
60/// those values are returned.
61///
62/// ## Example
63/// ```
64/// use stats;
65///
66/// let vals = vec![1, 1, 2, 2, 3];
67///
68/// assert_eq!(stats::modes(vals.into_iter()), (vec![1, 2], 2, 2));
69/// ```
70/// This has time complexity `O(n)`
71///
72/// If the data does not have a mode, then an empty `Vec` is returned.
73pub fn modes<T, I>(it: I) -> (Vec<T>, usize, u32)
74where
75    T: PartialOrd + Clone,
76    I: Iterator<Item = T>,
77{
78    it.collect::<Unsorted<T>>().modes()
79}
80
81/// Compute the antimodes on a stream of data.
82///
83/// Antimode is the least frequent non-zero score.
84///
85/// If there is a single antimode, then only that value is returned in the `Vec`
86/// however, if there are multiple values tied for occurring the least amount of times
87/// those values are returned.
88///
89/// Only the first 10 antimodes are returned to prevent returning the whole set
90/// when cardinality = number of records (i.e. all unique values).
91///
92/// ## Example
93/// ```
94/// use stats;
95///
96/// let vals = vec![1, 1, 2, 2, 3];
97///
98/// assert_eq!(stats::antimodes(vals.into_iter()), (vec![3], 1, 1));
99/// ```
100/// This has time complexity `O(n)`
101///
102/// If the data does not have an antimode, then an empty `Vec` is returned.
103pub fn antimodes<T, I>(it: I) -> (Vec<T>, usize, u32)
104where
105    T: PartialOrd + Clone,
106    I: Iterator<Item = T>,
107{
108    let (antimodes_result, antimodes_count, antimodes_occurrences) =
109        it.collect::<Unsorted<T>>().antimodes();
110    (antimodes_result, antimodes_count, antimodes_occurrences)
111}
112
113fn median_on_sorted<T>(data: &[T]) -> Option<f64>
114where
115    T: PartialOrd + ToPrimitive,
116{
117    Some(match data.len() {
118        // Empty slice case - return None early
119        0 => return None,
120        // Single element case - return that element converted to f64
121        1 => data.first()?.to_f64()?,
122        // Even length case - average the two middle elements
123        len if len.is_multiple_of(2) => {
124            let idx = len / 2;
125            // Safety: we know these indices are valid because we checked len is even and non-zero,
126            // so idx-1 and idx are valid indices into data
127            let v1 = unsafe { data.get_unchecked(idx - 1) }.to_f64()?;
128            let v2 = unsafe { data.get_unchecked(idx) }.to_f64()?;
129            f64::midpoint(v1, v2)
130        }
131        // Odd length case - return the middle element
132        // Safety: we know the index is within bounds
133        len => unsafe { data.get_unchecked(len / 2) }.to_f64()?,
134    })
135}
136
137fn mad_on_sorted<T>(data: &[T], precalc_median: Option<f64>) -> Option<f64>
138where
139    T: Sync + PartialOrd + ToPrimitive,
140{
141    if data.is_empty() {
142        return None;
143    }
144    let median_obs = precalc_median.unwrap_or_else(|| median_on_sorted(data).unwrap());
145
146    // Use adaptive parallel processing based on data size
147    let mut abs_diff_vec = if data.len() < PARALLEL_THRESHOLD {
148        // Sequential processing for small datasets
149        let mut vec = Vec::with_capacity(data.len());
150        for x in data {
151            vec.push((median_obs - unsafe { x.to_f64().unwrap_unchecked() }).abs());
152        }
153        vec
154    } else {
155        // Parallel processing for large datasets
156        data.par_iter()
157            .map(|x| (median_obs - unsafe { x.to_f64().unwrap_unchecked() }).abs())
158            .collect()
159    };
160
161    // Use adaptive sorting based on size
162    if abs_diff_vec.len() < PARALLEL_THRESHOLD {
163        abs_diff_vec.sort_unstable_by(|a, b| unsafe { a.partial_cmp(b).unwrap_unchecked() });
164    } else {
165        abs_diff_vec.par_sort_unstable_by(|a, b| unsafe { a.partial_cmp(b).unwrap_unchecked() });
166    }
167    median_on_sorted(&abs_diff_vec)
168}
169
170/// Selection algorithm to find the k-th smallest element in O(n) average time.
171/// This is an implementation of quickselect algorithm.
172fn quickselect<T>(data: &mut [Partial<T>], k: usize) -> Option<&T>
173where
174    T: PartialOrd,
175{
176    if data.is_empty() || k >= data.len() {
177        return None;
178    }
179
180    let mut left = 0;
181    let mut right = data.len() - 1;
182
183    loop {
184        if left == right {
185            return Some(&data[left].0);
186        }
187
188        // Use median-of-three pivot selection for better performance
189        let pivot_idx = median_of_three_pivot(data, left, right);
190        let pivot_idx = partition(data, left, right, pivot_idx);
191
192        match k.cmp(&pivot_idx) {
193            std::cmp::Ordering::Equal => return Some(&data[pivot_idx].0),
194            std::cmp::Ordering::Less => right = pivot_idx - 1,
195            std::cmp::Ordering::Greater => left = pivot_idx + 1,
196        }
197    }
198}
199
200/// Zero-copy selection algorithm that works with indices instead of copying data.
201/// This avoids the overhead of cloning data elements.
202fn quickselect_by_index<'a, T>(
203    data: &'a [Partial<T>],
204    indices: &mut [usize],
205    k: usize,
206) -> Option<&'a T>
207where
208    T: PartialOrd,
209{
210    if data.is_empty() || indices.is_empty() || k >= indices.len() {
211        return None;
212    }
213
214    let mut left = 0;
215    let mut right = indices.len() - 1;
216
217    loop {
218        if left == right {
219            return Some(&data[indices[left]].0);
220        }
221
222        // Use median-of-three pivot selection for better performance
223        let pivot_idx = median_of_three_pivot_by_index(data, indices, left, right);
224        let pivot_idx = partition_by_index(data, indices, left, right, pivot_idx);
225
226        match k.cmp(&pivot_idx) {
227            std::cmp::Ordering::Equal => return Some(&data[indices[pivot_idx]].0),
228            std::cmp::Ordering::Less => right = pivot_idx - 1,
229            std::cmp::Ordering::Greater => left = pivot_idx + 1,
230        }
231    }
232}
233
234/// Select the median of three elements as pivot for better quickselect performance
235fn median_of_three_pivot<T>(data: &[Partial<T>], left: usize, right: usize) -> usize
236where
237    T: PartialOrd,
238{
239    let mid = left + (right - left) / 2;
240
241    if data[left] <= data[mid] {
242        if data[mid] <= data[right] {
243            mid
244        } else if data[left] <= data[right] {
245            right
246        } else {
247            left
248        }
249    } else if data[left] <= data[right] {
250        left
251    } else if data[mid] <= data[right] {
252        right
253    } else {
254        mid
255    }
256}
257
258/// Select the median of three elements as pivot using indices
259fn median_of_three_pivot_by_index<T>(
260    data: &[Partial<T>],
261    indices: &[usize],
262    left: usize,
263    right: usize,
264) -> usize
265where
266    T: PartialOrd,
267{
268    let mid = left + (right - left) / 2;
269
270    if data[indices[left]] <= data[indices[mid]] {
271        if data[indices[mid]] <= data[indices[right]] {
272            mid
273        } else if data[indices[left]] <= data[indices[right]] {
274            right
275        } else {
276            left
277        }
278    } else if data[indices[left]] <= data[indices[right]] {
279        left
280    } else if data[indices[mid]] <= data[indices[right]] {
281        right
282    } else {
283        mid
284    }
285}
286
287/// Partition function for quickselect
288fn partition<T>(data: &mut [Partial<T>], left: usize, right: usize, pivot_idx: usize) -> usize
289where
290    T: PartialOrd,
291{
292    // Move pivot to end
293    data.swap(pivot_idx, right);
294    let mut store_idx = left;
295
296    // Move all elements smaller than pivot to the left
297    // Cache pivot position for better cache locality (access data[right] directly each time)
298    for i in left..right {
299        // Safety: i, store_idx, and right are guaranteed to be in bounds
300        // Compare directly with pivot at data[right] - compiler should optimize this access
301        if unsafe { data.get_unchecked(i) <= data.get_unchecked(right) } {
302            data.swap(i, store_idx);
303            store_idx += 1;
304        }
305    }
306
307    // Move pivot to its final place
308    data.swap(store_idx, right);
309    store_idx
310}
311
312/// Partition function for quickselect using indices
313fn partition_by_index<T>(
314    data: &[Partial<T>],
315    indices: &mut [usize],
316    left: usize,
317    right: usize,
318    pivot_idx: usize,
319) -> usize
320where
321    T: PartialOrd,
322{
323    // Move pivot to end
324    indices.swap(pivot_idx, right);
325    let mut store_idx = left;
326
327    // Cache pivot index and value for better cache locality
328    // This reduces indirection: indices[right] -> data[indices[right]]
329    // Safety: right is guaranteed to be in bounds
330    let pivot_idx_cached = unsafe { *indices.get_unchecked(right) };
331    let pivot_val = unsafe { data.get_unchecked(pivot_idx_cached) };
332
333    // Move all elements smaller than pivot to the left
334    let mut elem_idx: usize;
335    for i in left..right {
336        // Safety: i and store_idx are guaranteed to be in bounds
337        elem_idx = unsafe { *indices.get_unchecked(i) };
338        if unsafe { data.get_unchecked(elem_idx) <= pivot_val } {
339            indices.swap(i, store_idx);
340            store_idx += 1;
341        }
342    }
343
344    // Move pivot to its final place
345    indices.swap(store_idx, right);
346    store_idx
347}
348
349// This implementation follows Method 3 from https://en.wikipedia.org/wiki/Quartile
350// It divides data into quarters based on the length n = 4k + r where r is remainder.
351// For each remainder case (0,1,2,3), it uses different formulas to compute Q1, Q2, Q3.
352fn quartiles_on_sorted<T>(data: &[Partial<T>]) -> Option<(f64, f64, f64)>
353where
354    T: PartialOrd + ToPrimitive,
355{
356    let len = data.len();
357
358    // Early return for small arrays
359    match len {
360        0..=2 => return None,
361        3 => {
362            return Some(
363                // SAFETY: We know these indices are valid because len == 3
364                unsafe {
365                    (
366                        data.get_unchecked(0).0.to_f64()?,
367                        data.get_unchecked(1).0.to_f64()?,
368                        data.get_unchecked(2).0.to_f64()?,
369                    )
370                },
371            );
372        }
373        _ => {}
374    }
375
376    // Calculate k and remainder in one division
377    let k = len / 4;
378    let remainder = len % 4;
379
380    // SAFETY: All index calculations below are guaranteed to be in bounds
381    // because we've verified len >= 4 above and k is len/4
382    unsafe {
383        Some(match remainder {
384            0 => {
385                // Let data = {x_i}_{i=0..4k} where k is positive integer.
386                // Median q2 = (x_{2k-1} + x_{2k}) / 2.
387                // If we divide data into two parts {x_i < q2} as L and
388                // {x_i > q2} as R, #L == #R == 2k holds true. Thus,
389                // q1 = (x_{k-1} + x_{k}) / 2 and q3 = (x_{3k-1} + x_{3k}) / 2.
390                // =============
391                // Simply put: Length is multiple of 4 (4k)
392                // q1 = (x_{k-1} + x_k) / 2
393                // q2 = (x_{2k-1} + x_{2k}) / 2
394                // q3 = (x_{3k-1} + x_{3k}) / 2
395                let q1 = f64::midpoint(
396                    data.get_unchecked(k - 1).0.to_f64()?,
397                    data.get_unchecked(k).0.to_f64()?,
398                );
399                let q2 = f64::midpoint(
400                    data.get_unchecked(2 * k - 1).0.to_f64()?,
401                    data.get_unchecked(2 * k).0.to_f64()?,
402                );
403                let q3 = f64::midpoint(
404                    data.get_unchecked(3 * k - 1).0.to_f64()?,
405                    data.get_unchecked(3 * k).0.to_f64()?,
406                );
407                (q1, q2, q3)
408            }
409            1 => {
410                // Let data = {x_i}_{i=0..4k+1} where k is positive integer.
411                // Median q2 = x_{2k}.
412                // If we divide data other than q2 into two parts {x_i < q2}
413                // as L and {x_i > q2} as R, #L == #R == 2k holds true. Thus,
414                // q1 = (x_{k-1} + x_{k}) / 2 and q3 = (x_{3k} + x_{3k+1}) / 2.
415                // =============
416                // Simply put: Length is 4k + 1
417                // q1 = (x_{k-1} + x_k) / 2
418                // q2 = x_{2k}
419                // q3 = (x_{3k} + x_{3k+1}) / 2
420                let q1 = f64::midpoint(
421                    data.get_unchecked(k - 1).0.to_f64()?,
422                    data.get_unchecked(k).0.to_f64()?,
423                );
424                let q2 = data.get_unchecked(2 * k).0.to_f64()?;
425                let q3 = f64::midpoint(
426                    data.get_unchecked(3 * k).0.to_f64()?,
427                    data.get_unchecked(3 * k + 1).0.to_f64()?,
428                );
429                (q1, q2, q3)
430            }
431            2 => {
432                // Let data = {x_i}_{i=0..4k+2} where k is positive integer.
433                // Median q2 = (x_{(2k+1)-1} + x_{2k+1}) / 2.
434                // If we divide data into two parts {x_i < q2} as L and
435                // {x_i > q2} as R, it's true that #L == #R == 2k+1.
436                // Thus, q1 = x_{k} and q3 = x_{3k+1}.
437                // =============
438                // Simply put: Length is 4k + 2
439                // q1 = x_k
440                // q2 = (x_{2k} + x_{2k+1}) / 2
441                // q3 = x_{3k+1}
442                let q1 = data.get_unchecked(k).0.to_f64()?;
443                let q2 = f64::midpoint(
444                    data.get_unchecked(2 * k).0.to_f64()?,
445                    data.get_unchecked(2 * k + 1).0.to_f64()?,
446                );
447                let q3 = data.get_unchecked(3 * k + 1).0.to_f64()?;
448                (q1, q2, q3)
449            }
450            _ => {
451                // Let data = {x_i}_{i=0..4k+3} where k is positive integer.
452                // Median q2 = x_{2k+1}.
453                // If we divide data other than q2 into two parts {x_i < q2}
454                // as L and {x_i > q2} as R, #L == #R == 2k+1 holds true.
455                // Thus, q1 = x_{k} and q3 = x_{3k+2}.
456                // =============
457                // Simply put: Length is 4k + 3
458                // q1 = x_k
459                // q2 = x_{2k+1}
460                // q3 = x_{3k+2}
461                let q1 = data.get_unchecked(k).0.to_f64()?;
462                let q2 = data.get_unchecked(2 * k + 1).0.to_f64()?;
463                let q3 = data.get_unchecked(3 * k + 2).0.to_f64()?;
464                (q1, q2, q3)
465            }
466        })
467    }
468}
469
470/// Compute quartiles using selection algorithm in O(n) time instead of O(n log n) sorting.
471/// This implementation follows Method 3 from `<https://en.wikipedia.org/wiki/Quartile>`
472fn quartiles_with_selection<T>(data: &mut [Partial<T>]) -> Option<(f64, f64, f64)>
473where
474    T: PartialOrd + ToPrimitive,
475{
476    let len = data.len();
477
478    // Early return for small arrays
479    match len {
480        0..=2 => return None,
481        3 => {
482            // For 3 elements, we need to find the sorted order using selection
483            let min_val = quickselect(data, 0)?.to_f64()?;
484            let med_val = quickselect(data, 1)?.to_f64()?;
485            let max_val = quickselect(data, 2)?.to_f64()?;
486            return Some((min_val, med_val, max_val));
487        }
488        _ => {}
489    }
490
491    // Calculate k and remainder in one division
492    let k = len / 4;
493    let remainder = len % 4;
494
495    // Use selection algorithm to find the required order statistics
496    match remainder {
497        0 => {
498            // Length is multiple of 4 (4k)
499            // Q1 = (x_{k-1} + x_k) / 2
500            // Q2 = (x_{2k-1} + x_{2k}) / 2
501            // Q3 = (x_{3k-1} + x_{3k}) / 2
502            let q1_low = quickselect(data, k - 1)?.to_f64()?;
503            let q1_high = quickselect(data, k)?.to_f64()?;
504            let q1 = f64::midpoint(q1_low, q1_high);
505
506            let q2_low = quickselect(data, 2 * k - 1)?.to_f64()?;
507            let q2_high = quickselect(data, 2 * k)?.to_f64()?;
508            let q2 = f64::midpoint(q2_low, q2_high);
509
510            let q3_low = quickselect(data, 3 * k - 1)?.to_f64()?;
511            let q3_high = quickselect(data, 3 * k)?.to_f64()?;
512            let q3 = f64::midpoint(q3_low, q3_high);
513
514            Some((q1, q2, q3))
515        }
516        1 => {
517            // Length is 4k + 1
518            // Q1 = (x_{k-1} + x_k) / 2
519            // Q2 = x_{2k}
520            // Q3 = (x_{3k} + x_{3k+1}) / 2
521            let q1_low = quickselect(data, k - 1)?.to_f64()?;
522            let q1_high = quickselect(data, k)?.to_f64()?;
523            let q1 = f64::midpoint(q1_low, q1_high);
524
525            let q2 = quickselect(data, 2 * k)?.to_f64()?;
526
527            let q3_low = quickselect(data, 3 * k)?.to_f64()?;
528            let q3_high = quickselect(data, 3 * k + 1)?.to_f64()?;
529            let q3 = f64::midpoint(q3_low, q3_high);
530
531            Some((q1, q2, q3))
532        }
533        2 => {
534            // Length is 4k + 2
535            // Q1 = x_k
536            // Q2 = (x_{2k} + x_{2k+1}) / 2
537            // Q3 = x_{3k+1}
538            let q1 = quickselect(data, k)?.to_f64()?;
539
540            let q2_low = quickselect(data, 2 * k)?.to_f64()?;
541            let q2_high = quickselect(data, 2 * k + 1)?.to_f64()?;
542            let q2 = f64::midpoint(q2_low, q2_high);
543
544            let q3 = quickselect(data, 3 * k + 1)?.to_f64()?;
545
546            Some((q1, q2, q3))
547        }
548        _ => {
549            // Length is 4k + 3
550            // Q1 = x_k
551            // Q2 = x_{2k+1}
552            // Q3 = x_{3k+2}
553            let q1 = quickselect(data, k)?.to_f64()?;
554            let q2 = quickselect(data, 2 * k + 1)?.to_f64()?;
555            let q3 = quickselect(data, 3 * k + 2)?.to_f64()?;
556
557            Some((q1, q2, q3))
558        }
559    }
560}
561
562/// Zero-copy quartiles computation using index-based selection.
563/// This avoids copying data by working with an array of indices.
564fn quartiles_with_zero_copy_selection<T>(data: &[Partial<T>]) -> Option<(f64, f64, f64)>
565where
566    T: PartialOrd + ToPrimitive,
567{
568    let len = data.len();
569
570    // Early return for small arrays
571    match len {
572        0..=2 => return None,
573        3 => {
574            // For 3 elements, create indices and find sorted order
575            let mut indices = vec![0, 1, 2];
576            let min_val = quickselect_by_index(data, &mut indices, 0)?.to_f64()?;
577            let med_val = quickselect_by_index(data, &mut indices, 1)?.to_f64()?;
578            let max_val = quickselect_by_index(data, &mut indices, 2)?.to_f64()?;
579            return Some((min_val, med_val, max_val));
580        }
581        _ => {}
582    }
583
584    // Calculate k and remainder in one division
585    let k = len / 4;
586    let remainder = len % 4;
587
588    // Allocate indices array once and reuse it by resetting after each call
589    let indices_template: Vec<usize> = (0..len).collect();
590    #[allow(unused)]
591    let mut indices: Vec<usize> = Vec::with_capacity(len);
592
593    // Use zero-copy selection algorithm to find the required order statistics
594    // Note: Each quickselect_by_index call mutates the indices array, so we need
595    // to reset it for each call to ensure correctness.
596    match remainder {
597        0 => {
598            // Length is multiple of 4 (4k)
599            indices.clone_from(&indices_template);
600            let q1_low = quickselect_by_index(data, &mut indices, k - 1)?.to_f64()?;
601            indices.clone_from(&indices_template);
602            let q1_high = quickselect_by_index(data, &mut indices, k)?.to_f64()?;
603            let q1 = f64::midpoint(q1_low, q1_high);
604
605            indices.clone_from(&indices_template);
606            let q2_low = quickselect_by_index(data, &mut indices, 2 * k - 1)?.to_f64()?;
607            indices.clone_from(&indices_template);
608            let q2_high = quickselect_by_index(data, &mut indices, 2 * k)?.to_f64()?;
609            let q2 = f64::midpoint(q2_low, q2_high);
610
611            indices.clone_from(&indices_template);
612            let q3_low = quickselect_by_index(data, &mut indices, 3 * k - 1)?.to_f64()?;
613            indices.clone_from(&indices_template);
614            let q3_high = quickselect_by_index(data, &mut indices, 3 * k)?.to_f64()?;
615            let q3 = f64::midpoint(q3_low, q3_high);
616
617            Some((q1, q2, q3))
618        }
619        1 => {
620            // Length is 4k + 1
621            indices.clone_from(&indices_template);
622            let q1_low = quickselect_by_index(data, &mut indices, k - 1)?.to_f64()?;
623            indices.clone_from(&indices_template);
624            let q1_high = quickselect_by_index(data, &mut indices, k)?.to_f64()?;
625            let q1 = f64::midpoint(q1_low, q1_high);
626
627            indices.clone_from(&indices_template);
628            let q2 = quickselect_by_index(data, &mut indices, 2 * k)?.to_f64()?;
629
630            indices.clone_from(&indices_template);
631            let q3_low = quickselect_by_index(data, &mut indices, 3 * k)?.to_f64()?;
632            indices.clone_from(&indices_template);
633            let q3_high = quickselect_by_index(data, &mut indices, 3 * k + 1)?.to_f64()?;
634            let q3 = f64::midpoint(q3_low, q3_high);
635
636            Some((q1, q2, q3))
637        }
638        2 => {
639            // Length is 4k + 2
640            indices.clone_from(&indices_template);
641            let q1 = quickselect_by_index(data, &mut indices, k)?.to_f64()?;
642
643            indices.clone_from(&indices_template);
644            let q2_low = quickselect_by_index(data, &mut indices, 2 * k)?.to_f64()?;
645            indices.clone_from(&indices_template);
646            let q2_high = quickselect_by_index(data, &mut indices, 2 * k + 1)?.to_f64()?;
647            let q2 = f64::midpoint(q2_low, q2_high);
648
649            indices.clone_from(&indices_template);
650            let q3 = quickselect_by_index(data, &mut indices, 3 * k + 1)?.to_f64()?;
651
652            Some((q1, q2, q3))
653        }
654        _ => {
655            // Length is 4k + 3
656            indices.clone_from(&indices_template);
657            let q1 = quickselect_by_index(data, &mut indices, k)?.to_f64()?;
658            indices.clone_from(&indices_template);
659            let q2 = quickselect_by_index(data, &mut indices, 2 * k + 1)?.to_f64()?;
660            indices.clone_from(&indices_template);
661            let q3 = quickselect_by_index(data, &mut indices, 3 * k + 2)?.to_f64()?;
662
663            Some((q1, q2, q3))
664        }
665    }
666}
667
668fn mode_on_sorted<T, I>(it: I) -> Option<T>
669where
670    T: PartialOrd,
671    I: Iterator<Item = T>,
672{
673    use std::cmp::Ordering;
674
675    // This approach to computing the mode works very nicely when the
676    // number of samples is large and is close to its cardinality.
677    // In other cases, a hashmap would be much better.
678    // But really, how can we know this when given an arbitrary stream?
679    // Might just switch to a hashmap to track frequencies. That would also
680    // be generally useful for discovering the cardinality of a sample.
681    let (mut mode, mut next) = (None, None);
682    let (mut mode_count, mut next_count) = (0usize, 0usize);
683    for x in it {
684        if mode.as_ref() == Some(&x) {
685            mode_count += 1;
686        } else if next.as_ref() == Some(&x) {
687            next_count += 1;
688        } else {
689            next = Some(x);
690            next_count = 0;
691        }
692
693        match next_count.cmp(&mode_count) {
694            Ordering::Greater => {
695                mode = next;
696                mode_count = next_count;
697                next = None;
698                next_count = 0;
699            }
700            Ordering::Equal => {
701                mode = None;
702                mode_count = 0;
703            }
704            Ordering::Less => {}
705        }
706    }
707    mode
708}
709
710/// Computes both modes and antimodes from a sorted slice of values.
711/// This version works with references to avoid unnecessary cloning.
712///
713/// # Arguments
714///
715/// * `data` - A sorted slice of values
716///
717/// # Notes
718///
719/// - Mode is the most frequently occurring value(s)
720/// - Antimode is the least frequently occurring value(s)
721/// - Only returns up to 10 antimodes to avoid returning the full set when all values are unique
722/// - For empty slices, returns empty vectors and zero counts
723/// - For single value slices, returns that value as the mode and empty antimode
724/// - When all values occur exactly once, returns empty mode and up to 10 values as antimodes
725///
726/// # Returns
727///
728/// A tuple containing:
729/// * Modes information: `(Vec<T>, usize, u32)` where:
730///   - Vec<T>: Vector containing the mode values
731///   - usize: Number of modes found
732///   - u32: Frequency/count of the mode values
733/// * Antimodes information: `(Vec<T>, usize, u32)` where:
734///   - Vec<T>: Vector containing up to 10 antimode values
735///   - usize: Total number of antimodes
736///   - u32: Frequency/count of the antimode values
737#[allow(clippy::type_complexity)]
738#[inline]
739fn modes_and_antimodes_on_sorted_slice<T>(
740    data: &[Partial<T>],
741) -> ((Vec<T>, usize, u32), (Vec<T>, usize, u32))
742where
743    T: PartialOrd + Clone,
744{
745    let size = data.len();
746
747    // Early return for empty slice
748    if size == 0 {
749        return ((Vec::new(), 0, 0), (Vec::new(), 0, 0));
750    }
751
752    // Estimate capacity using integer square root of size
753    // Integer square root using binary search (faster than floating point sqrt)
754    let sqrt_size = if size == 0 {
755        0
756    } else {
757        let mut x = size;
758        let mut y = x.div_ceil(2);
759        while y < x {
760            x = y;
761            y = usize::midpoint(x, size / x);
762        }
763        x
764    };
765    let mut runs: Vec<(&T, u32)> = Vec::with_capacity(sqrt_size.clamp(16, 1_000));
766
767    let mut current_value = &data[0].0;
768    let mut current_count = 1;
769    let mut highest_count = 1;
770    let mut lowest_count = u32::MAX;
771
772    // Count consecutive runs - optimized to reduce allocations
773    for x in data.iter().skip(1) {
774        if x.0 == *current_value {
775            current_count += 1;
776            highest_count = highest_count.max(current_count);
777        } else {
778            runs.push((current_value, current_count));
779            lowest_count = lowest_count.min(current_count);
780            current_value = &x.0;
781            current_count = 1;
782        }
783    }
784    runs.push((current_value, current_count));
785    lowest_count = lowest_count.min(current_count);
786
787    // Early return if only one unique value
788    if runs.len() == 1 {
789        let (val, count) = runs.pop().unwrap();
790        return ((vec![val.clone()], 1, count), (Vec::new(), 0, 0));
791    }
792
793    // Special case: if all values appear exactly once
794    if highest_count == 1 {
795        let antimodes_count = runs.len().min(10);
796        let total_count = runs.len();
797        let mut antimodes = Vec::with_capacity(antimodes_count);
798        for (val, _) in runs.into_iter().take(antimodes_count) {
799            antimodes.push(val.clone());
800        }
801        // For modes: empty, count 0, occurrences 0 (not 1, 1)
802        return ((Vec::new(), 0, 0), (antimodes, total_count, 1));
803    }
804
805    // Estimate capacities based on the number of runs
806    // For modes: typically 1-3 modes, rarely more than 10% of runs
807    // For antimodes: we only collect up to 10, but need to count all
808    let estimated_modes = (runs.len() / 10).clamp(1, 10);
809    let estimated_antimodes = 10.min(runs.len());
810
811    // Collect indices first to avoid unnecessary cloning
812    let mut modes_indices = Vec::with_capacity(estimated_modes);
813    let mut antimodes_indices = Vec::with_capacity(estimated_antimodes);
814    let mut mode_count = 0;
815    let mut antimodes_count = 0;
816    let mut antimodes_collected = 0_u32;
817
818    // Count and collect mode/antimode indices simultaneously
819    for (idx, (_, count)) in runs.iter().enumerate() {
820        if *count == highest_count {
821            modes_indices.push(idx);
822            mode_count += 1;
823        }
824        if *count == lowest_count {
825            antimodes_count += 1;
826            if antimodes_collected < 10 {
827                antimodes_indices.push(idx);
828                antimodes_collected += 1;
829            }
830        }
831    }
832
833    // Extract values only for the indices we need, cloning only at the end
834    let modes_result: Vec<T> = modes_indices
835        .into_iter()
836        .map(|idx| runs[idx].0.clone())
837        .collect();
838    let antimodes_result: Vec<T> = antimodes_indices
839        .into_iter()
840        .map(|idx| runs[idx].0.clone())
841        .collect();
842
843    (
844        (modes_result, mode_count, highest_count),
845        (antimodes_result, antimodes_count, lowest_count),
846    )
847}
848
849/// A commutative data structure for lazily sorted sequences of data.
850///
851/// The sort does not occur until statistics need to be computed.
852///
853/// Note that this works on types that do not define a total ordering like
854/// `f32` and `f64`. When an ordering is not defined, an arbitrary order
855/// is returned.
856#[allow(clippy::unsafe_derive_deserialize)]
857#[derive(Clone, Serialize, Deserialize, Eq, PartialEq)]
858pub struct Unsorted<T> {
859    sorted: bool,
860    data: Vec<Partial<T>>,
861}
862
863impl<T: PartialOrd> Unsorted<T> {
864    /// Create initial empty state.
865    #[inline]
866    #[must_use]
867    pub fn new() -> Unsorted<T> {
868        Default::default()
869    }
870
871    /// Add a new element to the set.
872    #[allow(clippy::inline_always)]
873    #[inline(always)]
874    pub fn add(&mut self, v: T) {
875        self.sorted = false;
876        self.data.push(Partial(v));
877    }
878
879    /// Return the number of data points.
880    #[inline]
881    #[must_use]
882    pub const fn len(&self) -> usize {
883        self.data.len()
884    }
885
886    #[inline]
887    #[must_use]
888    pub const fn is_empty(&self) -> bool {
889        self.data.is_empty()
890    }
891
892    #[inline]
893    fn sort(&mut self) {
894        if !self.sorted {
895            // Use sequential sort for small datasets (< 10k elements) to avoid parallel overhead
896            if self.data.len() < PARALLEL_THRESHOLD {
897                self.data.sort_unstable();
898            } else {
899                self.data.par_sort_unstable();
900            }
901            self.sorted = true;
902        }
903    }
904
905    #[inline]
906    const fn already_sorted(&mut self) {
907        self.sorted = true;
908    }
909
910    /// Add multiple elements efficiently
911    #[inline]
912    pub fn add_bulk(&mut self, values: Vec<T>) {
913        self.sorted = false;
914        self.data.reserve(values.len());
915        self.data.extend(values.into_iter().map(Partial));
916    }
917
918    /// Shrink capacity to fit current data
919    #[inline]
920    pub fn shrink_to_fit(&mut self) {
921        self.data.shrink_to_fit();
922    }
923
924    /// Create with specific capacity
925    #[inline]
926    #[must_use]
927    pub fn with_capacity(capacity: usize) -> Self {
928        Unsorted {
929            sorted: true,
930            data: Vec::with_capacity(capacity),
931        }
932    }
933
934    /// Add a value assuming it's greater than all existing values
935    #[inline]
936    pub fn push_ascending(&mut self, value: T) {
937        if let Some(last) = self.data.last() {
938            debug_assert!(last.0 <= value, "Value must be >= than last element");
939        }
940        self.data.push(Partial(value));
941        // Data remains sorted
942    }
943}
944
945impl<T: PartialOrd + PartialEq + Clone> Unsorted<T> {
946    #[inline]
947    /// Returns the cardinality of the data.
948    /// Set `sorted` to `true` if the data is already sorted.
949    /// Set `parallel_threshold` to `0` to force sequential processing.
950    /// Set `parallel_threshold` to `1` to use the default parallel threshold (`10_000`).
951    /// Set `parallel_threshold` to `2` to force parallel processing.
952    /// Set `parallel_threshold` to any other value to use a custom parallel threshold
953    /// greater than the default threshold of `10_000`.
954    pub fn cardinality(&mut self, sorted: bool, parallel_threshold: usize) -> u64 {
955        const CHUNK_SIZE: usize = 2048; // Process data in chunks of 2048 elements
956        const DEFAULT_PARALLEL_THRESHOLD: usize = 10_240; // multiple of 2048
957
958        let len = self.data.len();
959        match len {
960            0 => return 0,
961            1 => return 1,
962            _ => {}
963        }
964
965        if sorted {
966            self.already_sorted();
967        } else {
968            self.sort();
969        }
970
971        let use_parallel = parallel_threshold != 0
972            && (parallel_threshold == 1
973                || len > parallel_threshold.max(DEFAULT_PARALLEL_THRESHOLD));
974
975        if use_parallel {
976            // Parallel processing using chunks
977            // Process chunks in parallel, returning (count, first_elem, last_elem) for each
978            type ChunkInfo<'a, T> = Vec<(u64, Option<&'a Partial<T>>, Option<&'a Partial<T>>)>;
979            let chunk_info: ChunkInfo<'_, T> = self
980                .data
981                .par_chunks(CHUNK_SIZE)
982                .map(|chunk| {
983                    // Count unique elements within this chunk
984                    let mut count = u64::from(!chunk.is_empty());
985                    for window in chunk.windows(2) {
986                        // safety: windows(2) guarantees window has length 2
987                        if unsafe { window.get_unchecked(0) != window.get_unchecked(1) } {
988                            count += 1;
989                        }
990                    }
991                    (count, chunk.first(), chunk.last())
992                })
993                .collect();
994
995            // Combine results, checking boundaries between chunks
996            let mut total = 0;
997            for (i, &(count, first_opt, _last_opt)) in chunk_info.iter().enumerate() {
998                total += count;
999
1000                // Check boundary with previous chunk
1001                if i > 0
1002                    && let (Some(prev_last), Some(curr_first)) = (chunk_info[i - 1].2, first_opt)
1003                    && prev_last == curr_first
1004                {
1005                    total -= 1; // Deduct 1 if boundary values are equal
1006                }
1007            }
1008
1009            total
1010        } else {
1011            // Sequential processing
1012
1013            // the statement below is equivalent to:
1014            // let mut count = if self.data.is_empty() { 0 } else { 1 };
1015            let mut count = u64::from(!self.data.is_empty());
1016
1017            for window in self.data.windows(2) {
1018                // safety: windows(2) guarantees window has length 2
1019                if unsafe { window.get_unchecked(0) != window.get_unchecked(1) } {
1020                    count += 1;
1021                }
1022            }
1023            count
1024        }
1025    }
1026}
1027
1028impl<T: PartialOrd + Clone> Unsorted<T> {
1029    /// Returns the mode of the data.
1030    #[inline]
1031    pub fn mode(&mut self) -> Option<T> {
1032        if self.data.is_empty() {
1033            return None;
1034        }
1035        self.sort();
1036        mode_on_sorted(self.data.iter().map(|p| &p.0)).cloned()
1037    }
1038
1039    /// Returns the modes of the data.
1040    /// Note that there is also a `frequency::mode()` function that return one mode
1041    /// with the highest frequency. If there is a tie, it returns None.
1042    #[inline]
1043    fn modes(&mut self) -> (Vec<T>, usize, u32) {
1044        if self.data.is_empty() {
1045            return (Vec::new(), 0, 0);
1046        }
1047        self.sort();
1048        modes_and_antimodes_on_sorted_slice(&self.data).0
1049    }
1050
1051    /// Returns the antimodes of the data.
1052    /// `antimodes_result` only returns the first 10 antimodes
1053    #[inline]
1054    fn antimodes(&mut self) -> (Vec<T>, usize, u32) {
1055        if self.data.is_empty() {
1056            return (Vec::new(), 0, 0);
1057        }
1058        self.sort();
1059        modes_and_antimodes_on_sorted_slice(&self.data).1
1060    }
1061
1062    /// Returns the modes and antimodes of the data.
1063    /// `antimodes_result` only returns the first 10 antimodes
1064    #[allow(clippy::type_complexity)]
1065    #[inline]
1066    pub fn modes_antimodes(&mut self) -> ((Vec<T>, usize, u32), (Vec<T>, usize, u32)) {
1067        if self.data.is_empty() {
1068            return ((Vec::new(), 0, 0), (Vec::new(), 0, 0));
1069        }
1070        self.sort();
1071        modes_and_antimodes_on_sorted_slice(&self.data)
1072    }
1073}
1074
1075impl<T: PartialOrd + ToPrimitive> Unsorted<T> {
1076    /// Returns the median of the data.
1077    #[inline]
1078    pub fn median(&mut self) -> Option<f64> {
1079        if self.data.is_empty() {
1080            return None;
1081        }
1082        self.sort();
1083        median_on_sorted(&self.data)
1084    }
1085}
1086
1087impl<T: PartialOrd + ToPrimitive> Unsorted<T> {
1088    /// Returns the Median Absolute Deviation (MAD) of the data.
1089    #[inline]
1090    pub fn mad(&mut self, existing_median: Option<f64>) -> Option<f64> {
1091        if self.data.is_empty() {
1092            return None;
1093        }
1094        if existing_median.is_none() {
1095            self.sort();
1096        }
1097        mad_on_sorted(&self.data, existing_median)
1098    }
1099}
1100
1101impl<T: PartialOrd + ToPrimitive> Unsorted<T> {
1102    /// Returns the quartiles of the data using the traditional sorting approach.
1103    ///
1104    /// This method sorts the data first and then computes quartiles.
1105    /// Time complexity: O(n log n)
1106    #[inline]
1107    pub fn quartiles(&mut self) -> Option<(f64, f64, f64)> {
1108        if self.data.is_empty() {
1109            return None;
1110        }
1111        self.sort();
1112        quartiles_on_sorted(&self.data)
1113    }
1114}
1115
1116impl<T: PartialOrd + ToPrimitive + Clone> Unsorted<T> {
1117    /// Returns the quartiles of the data using selection algorithm.
1118    ///
1119    /// This implementation uses a selection algorithm (quickselect) to find quartiles
1120    /// in O(n) average time complexity instead of O(n log n) sorting.
1121    /// Requires T to implement Clone to create a working copy of the data.
1122    ///
1123    /// **Performance Note**: While theoretically O(n) vs O(n log n), this implementation
1124    /// is often slower than the sorting-based approach for small to medium datasets due to:
1125    /// - Need to find multiple order statistics (3 separate quickselect calls)
1126    /// - Overhead of copying data to avoid mutation
1127    /// - Rayon's highly optimized parallel sorting
1128    #[inline]
1129    pub fn quartiles_with_selection(&mut self) -> Option<(f64, f64, f64)> {
1130        if self.data.is_empty() {
1131            return None;
1132        }
1133        // If data is already sorted, use zero-copy approach to avoid cloning
1134        if self.sorted {
1135            return quartiles_with_zero_copy_selection(&self.data);
1136        }
1137        // Create a copy using collect to avoid mutating the original for selection algorithm
1138        let mut data_copy: Vec<Partial<T>> =
1139            self.data.iter().map(|x| Partial(x.0.clone())).collect();
1140        quartiles_with_selection(&mut data_copy)
1141    }
1142}
1143
1144impl<T: PartialOrd + ToPrimitive> Unsorted<T> {
1145    /// Returns the quartiles using zero-copy selection algorithm.
1146    ///
1147    /// This implementation avoids copying data by working with indices instead,
1148    /// providing better performance than the clone-based selection approach.
1149    /// The algorithm is O(n) average time and only allocates a vector of indices (usize).
1150    #[inline]
1151    #[must_use]
1152    pub fn quartiles_zero_copy(&self) -> Option<(f64, f64, f64)> {
1153        if self.data.is_empty() {
1154            return None;
1155        }
1156        quartiles_with_zero_copy_selection(&self.data)
1157    }
1158}
1159
1160impl<T: PartialOrd> Commute for Unsorted<T> {
1161    #[inline]
1162    fn merge(&mut self, mut v: Unsorted<T>) {
1163        if v.is_empty() {
1164            return;
1165        }
1166
1167        self.sorted = false;
1168        // we use std::mem::take to avoid unnecessary allocations
1169        self.data.extend(std::mem::take(&mut v.data));
1170    }
1171}
1172
1173impl<T: PartialOrd> Default for Unsorted<T> {
1174    #[inline]
1175    fn default() -> Unsorted<T> {
1176        Unsorted {
1177            data: Vec::with_capacity(10_000),
1178            sorted: true, // empty is sorted
1179        }
1180    }
1181}
1182
1183impl<T: PartialOrd> FromIterator<T> for Unsorted<T> {
1184    #[inline]
1185    fn from_iter<I: IntoIterator<Item = T>>(it: I) -> Unsorted<T> {
1186        let mut v = Unsorted::new();
1187        v.extend(it);
1188        v
1189    }
1190}
1191
1192impl<T: PartialOrd> Extend<T> for Unsorted<T> {
1193    #[inline]
1194    fn extend<I: IntoIterator<Item = T>>(&mut self, it: I) {
1195        self.sorted = false;
1196        self.data.extend(it.into_iter().map(Partial));
1197    }
1198}
1199
1200fn custom_percentiles_on_sorted<T>(data: &[Partial<T>], percentiles: &[u8]) -> Option<Vec<T>>
1201where
1202    T: PartialOrd + Clone,
1203{
1204    let len = data.len();
1205
1206    // Early return for empty array or invalid percentiles
1207    if len == 0 || percentiles.iter().any(|&p| p > 100) {
1208        return None;
1209    }
1210
1211    // Optimize: Check if percentiles are already sorted and unique
1212    let unique_percentiles: Vec<u8> = if percentiles.len() <= 1 {
1213        // Single or empty percentile - no need to sort/dedup
1214        percentiles.to_vec()
1215    } else {
1216        // Check if already sorted and unique (common case)
1217        let is_sorted_unique = percentiles.windows(2).all(|w| w[0] < w[1]);
1218
1219        if is_sorted_unique {
1220            // Already sorted and unique, use directly without cloning
1221            percentiles.to_vec()
1222        } else {
1223            // Need to sort and dedup - use HashSet for efficient deduplication
1224            use std::collections::HashSet;
1225            let mut seen = HashSet::with_capacity(percentiles.len().min(100));
1226            let mut sorted_unique = Vec::with_capacity(percentiles.len());
1227            for &p in percentiles {
1228                if seen.insert(p) {
1229                    sorted_unique.push(p);
1230                }
1231            }
1232            sorted_unique.sort_unstable();
1233            sorted_unique
1234        }
1235    };
1236
1237    let mut results = Vec::with_capacity(unique_percentiles.len());
1238
1239    // SAFETY: All index calculations below are guaranteed to be in bounds
1240    // because we've verified len > 0 and the rank calculation ensures
1241    // the index is within bounds
1242    unsafe {
1243        for &p in &unique_percentiles {
1244            // Calculate the ordinal rank using nearest-rank method
1245            // see https://en.wikipedia.org/wiki/Percentile#The_nearest-rank_method
1246            // n = ⌈(P/100) × N⌉
1247            #[allow(clippy::cast_sign_loss)]
1248            let rank = ((f64::from(p) / 100.0) * len as f64).ceil() as usize;
1249
1250            // Convert to 0-based index
1251            let idx = rank.saturating_sub(1);
1252
1253            // Get the value at that rank and extract the inner value
1254            results.push(data.get_unchecked(idx).0.clone());
1255        }
1256    }
1257
1258    Some(results)
1259}
1260
1261impl<T: PartialOrd + Clone> Unsorted<T> {
1262    /// Returns the requested percentiles of the data.
1263    ///
1264    /// Uses the nearest-rank method to compute percentiles.
1265    /// Each returned value is an actual value from the dataset.
1266    ///
1267    /// # Arguments
1268    /// * `percentiles` - A slice of u8 values representing percentiles to compute (0-100)
1269    ///
1270    /// # Returns
1271    /// * `None` if the data is empty or if any percentile is > 100
1272    /// * `Some(Vec<T>)` containing percentile values in the same order as requested
1273    ///
1274    /// # Example
1275    /// ```
1276    /// use stats::Unsorted;
1277    /// let mut data = Unsorted::new();
1278    /// data.extend(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
1279    /// let percentiles = vec![25, 50, 75];
1280    /// let results = data.custom_percentiles(&percentiles).unwrap();
1281    /// assert_eq!(results, vec![3, 5, 8]);
1282    /// ```
1283    #[inline]
1284    pub fn custom_percentiles(&mut self, percentiles: &[u8]) -> Option<Vec<T>> {
1285        if self.data.is_empty() {
1286            return None;
1287        }
1288        self.sort();
1289        custom_percentiles_on_sorted(&self.data, percentiles)
1290    }
1291}
1292
1293#[cfg(test)]
1294mod test {
1295    use super::*;
1296
1297    #[test]
1298    fn test_cardinality_empty() {
1299        let mut unsorted: Unsorted<i32> = Unsorted::new();
1300        assert_eq!(unsorted.cardinality(false, 1), 0);
1301    }
1302
1303    #[test]
1304    fn test_cardinality_single_element() {
1305        let mut unsorted = Unsorted::new();
1306        unsorted.add(5);
1307        assert_eq!(unsorted.cardinality(false, 1), 1);
1308    }
1309
1310    #[test]
1311    fn test_cardinality_unique_elements() {
1312        let mut unsorted = Unsorted::new();
1313        unsorted.extend(vec![1, 2, 3, 4, 5]);
1314        assert_eq!(unsorted.cardinality(false, 1), 5);
1315    }
1316
1317    #[test]
1318    fn test_cardinality_duplicate_elements() {
1319        let mut unsorted = Unsorted::new();
1320        unsorted.extend(vec![1, 2, 2, 3, 3, 3, 4, 4, 4, 4]);
1321        assert_eq!(unsorted.cardinality(false, 1), 4);
1322    }
1323
1324    #[test]
1325    fn test_cardinality_all_same() {
1326        let mut unsorted = Unsorted::new();
1327        unsorted.extend(vec![1; 100]);
1328        assert_eq!(unsorted.cardinality(false, 1), 1);
1329    }
1330
1331    #[test]
1332    fn test_cardinality_large_range() {
1333        let mut unsorted = Unsorted::new();
1334        unsorted.extend(0..1_000_000);
1335        assert_eq!(unsorted.cardinality(false, 1), 1_000_000);
1336    }
1337
1338    #[test]
1339    fn test_cardinality_large_range_sequential() {
1340        let mut unsorted = Unsorted::new();
1341        unsorted.extend(0..1_000_000);
1342        assert_eq!(unsorted.cardinality(false, 2_000_000), 1_000_000);
1343    }
1344
1345    #[test]
1346    fn test_cardinality_presorted() {
1347        let mut unsorted = Unsorted::new();
1348        unsorted.extend(vec![1, 2, 3, 4, 5]);
1349        unsorted.sort();
1350        assert_eq!(unsorted.cardinality(true, 1), 5);
1351    }
1352
1353    #[test]
1354    fn test_cardinality_float() {
1355        let mut unsorted = Unsorted::new();
1356        unsorted.extend(vec![1.0, 1.0, 2.0, 3.0, 3.0, 4.0]);
1357        assert_eq!(unsorted.cardinality(false, 1), 4);
1358    }
1359
1360    #[test]
1361    fn test_cardinality_string() {
1362        let mut unsorted = Unsorted::new();
1363        unsorted.extend(vec!["a", "b", "b", "c", "c", "c"]);
1364        assert_eq!(unsorted.cardinality(false, 1), 3);
1365    }
1366
1367    #[test]
1368    fn test_quartiles_selection_vs_sorted() {
1369        // Test that selection-based quartiles gives same results as sorting-based
1370        let test_cases = vec![
1371            vec![3, 5, 7, 9],
1372            vec![3, 5, 7],
1373            vec![1, 2, 7, 11],
1374            vec![3, 5, 7, 9, 12],
1375            vec![2, 2, 3, 8, 10],
1376            vec![3, 5, 7, 9, 12, 20],
1377            vec![0, 2, 4, 8, 10, 11],
1378            vec![3, 5, 7, 9, 12, 20, 21],
1379            vec![1, 5, 6, 6, 7, 10, 19],
1380        ];
1381
1382        for test_case in test_cases {
1383            let mut unsorted1 = Unsorted::new();
1384            let mut unsorted2 = Unsorted::new();
1385            let mut unsorted3 = Unsorted::new();
1386            unsorted1.extend(test_case.clone());
1387            unsorted2.extend(test_case.clone());
1388            unsorted3.extend(test_case.clone());
1389
1390            let result_sorted = unsorted1.quartiles();
1391            let result_selection = unsorted2.quartiles_with_selection();
1392            let result_zero_copy = unsorted3.quartiles_zero_copy();
1393
1394            assert_eq!(
1395                result_sorted, result_selection,
1396                "Selection mismatch for test case: {:?}",
1397                test_case
1398            );
1399            assert_eq!(
1400                result_sorted, result_zero_copy,
1401                "Zero-copy mismatch for test case: {:?}",
1402                test_case
1403            );
1404        }
1405    }
1406
1407    #[test]
1408    fn test_quartiles_with_selection_small() {
1409        // Test edge cases for selection-based quartiles
1410        let mut unsorted: Unsorted<i32> = Unsorted::new();
1411        assert_eq!(unsorted.quartiles_with_selection(), None);
1412
1413        let mut unsorted = Unsorted::new();
1414        unsorted.extend(vec![1, 2]);
1415        assert_eq!(unsorted.quartiles_with_selection(), None);
1416
1417        let mut unsorted = Unsorted::new();
1418        unsorted.extend(vec![1, 2, 3]);
1419        assert_eq!(unsorted.quartiles_with_selection(), Some((1.0, 2.0, 3.0)));
1420    }
1421
1422    #[test]
1423    fn test_quickselect() {
1424        let data = vec![
1425            Partial(3),
1426            Partial(1),
1427            Partial(4),
1428            Partial(1),
1429            Partial(5),
1430            Partial(9),
1431            Partial(2),
1432            Partial(6),
1433        ];
1434
1435        // Test finding different positions
1436        assert_eq!(quickselect(&mut data.clone(), 0), Some(&1));
1437        assert_eq!(quickselect(&mut data.clone(), 3), Some(&3));
1438        assert_eq!(quickselect(&mut data.clone(), 7), Some(&9));
1439
1440        // Test edge cases
1441        let mut empty: Vec<Partial<i32>> = vec![];
1442        assert_eq!(quickselect(&mut empty, 0), None);
1443
1444        let mut data = vec![Partial(3), Partial(1), Partial(4), Partial(1), Partial(5)];
1445        assert_eq!(quickselect(&mut data, 10), None); // k >= len
1446    }
1447
1448    #[test]
1449    fn median_stream() {
1450        assert_eq!(median(vec![3usize, 5, 7, 9].into_iter()), Some(6.0));
1451        assert_eq!(median(vec![3usize, 5, 7].into_iter()), Some(5.0));
1452    }
1453
1454    #[test]
1455    fn mad_stream() {
1456        assert_eq!(mad(vec![3usize, 5, 7, 9].into_iter(), None), Some(2.0));
1457        assert_eq!(
1458            mad(
1459                vec![
1460                    86usize, 60, 95, 39, 49, 12, 56, 82, 92, 24, 33, 28, 46, 34, 100, 39, 100, 38,
1461                    50, 61, 39, 88, 5, 13, 64
1462                ]
1463                .into_iter(),
1464                None
1465            ),
1466            Some(16.0)
1467        );
1468    }
1469
1470    #[test]
1471    fn mad_stream_precalc_median() {
1472        let data = vec![3usize, 5, 7, 9].into_iter();
1473        let median1 = median(data.clone());
1474        assert_eq!(mad(data, median1), Some(2.0));
1475
1476        let data2 = vec![
1477            86usize, 60, 95, 39, 49, 12, 56, 82, 92, 24, 33, 28, 46, 34, 100, 39, 100, 38, 50, 61,
1478            39, 88, 5, 13, 64,
1479        ]
1480        .into_iter();
1481        let median2 = median(data2.clone());
1482        assert_eq!(mad(data2, median2), Some(16.0));
1483    }
1484
1485    #[test]
1486    fn mode_stream() {
1487        assert_eq!(mode(vec![3usize, 5, 7, 9].into_iter()), None);
1488        assert_eq!(mode(vec![3usize, 3, 3, 3].into_iter()), Some(3));
1489        assert_eq!(mode(vec![3usize, 3, 3, 4].into_iter()), Some(3));
1490        assert_eq!(mode(vec![4usize, 3, 3, 3].into_iter()), Some(3));
1491        assert_eq!(mode(vec![1usize, 1, 2, 3, 3].into_iter()), None);
1492    }
1493
1494    #[test]
1495    fn median_floats() {
1496        assert_eq!(median(vec![3.0f64, 5.0, 7.0, 9.0].into_iter()), Some(6.0));
1497        assert_eq!(median(vec![3.0f64, 5.0, 7.0].into_iter()), Some(5.0));
1498    }
1499
1500    #[test]
1501    fn mode_floats() {
1502        assert_eq!(mode(vec![3.0f64, 5.0, 7.0, 9.0].into_iter()), None);
1503        assert_eq!(mode(vec![3.0f64, 3.0, 3.0, 3.0].into_iter()), Some(3.0));
1504        assert_eq!(mode(vec![3.0f64, 3.0, 3.0, 4.0].into_iter()), Some(3.0));
1505        assert_eq!(mode(vec![4.0f64, 3.0, 3.0, 3.0].into_iter()), Some(3.0));
1506        assert_eq!(mode(vec![1.0f64, 1.0, 2.0, 3.0, 3.0].into_iter()), None);
1507    }
1508
1509    #[test]
1510    fn modes_stream() {
1511        assert_eq!(modes(vec![3usize, 5, 7, 9].into_iter()), (vec![], 0, 0));
1512        assert_eq!(modes(vec![3usize, 3, 3, 3].into_iter()), (vec![3], 1, 4));
1513        assert_eq!(modes(vec![3usize, 3, 4, 4].into_iter()), (vec![3, 4], 2, 2));
1514        assert_eq!(modes(vec![4usize, 3, 3, 3].into_iter()), (vec![3], 1, 3));
1515        assert_eq!(modes(vec![1usize, 1, 2, 2].into_iter()), (vec![1, 2], 2, 2));
1516        let vec: Vec<u32> = vec![];
1517        assert_eq!(modes(vec.into_iter()), (vec![], 0, 0));
1518    }
1519
1520    #[test]
1521    fn modes_floats() {
1522        assert_eq!(
1523            modes(vec![3_f64, 5.0, 7.0, 9.0].into_iter()),
1524            (vec![], 0, 0)
1525        );
1526        assert_eq!(
1527            modes(vec![3_f64, 3.0, 3.0, 3.0].into_iter()),
1528            (vec![3.0], 1, 4)
1529        );
1530        assert_eq!(
1531            modes(vec![3_f64, 3.0, 4.0, 4.0].into_iter()),
1532            (vec![3.0, 4.0], 2, 2)
1533        );
1534        assert_eq!(
1535            modes(vec![1_f64, 1.0, 2.0, 3.0, 3.0].into_iter()),
1536            (vec![1.0, 3.0], 2, 2)
1537        );
1538    }
1539
1540    #[test]
1541    fn antimodes_stream() {
1542        assert_eq!(
1543            antimodes(vec![3usize, 5, 7, 9].into_iter()),
1544            (vec![3, 5, 7, 9], 4, 1)
1545        );
1546        assert_eq!(
1547            antimodes(vec![1usize, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13].into_iter()),
1548            (vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 13, 1)
1549        );
1550        assert_eq!(
1551            antimodes(vec![1usize, 3, 3, 3].into_iter()),
1552            (vec![1], 1, 1)
1553        );
1554        assert_eq!(
1555            antimodes(vec![3usize, 3, 4, 4].into_iter()),
1556            (vec![3, 4], 2, 2)
1557        );
1558        assert_eq!(
1559            antimodes(
1560                vec![
1561                    3usize, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
1562                    14, 14, 15, 15
1563                ]
1564                .into_iter()
1565            ),
1566            // we only show the first 10 of the 13 antimodes
1567            (vec![3, 4, 5, 6, 7, 8, 9, 10, 11, 12], 13, 2)
1568        );
1569        assert_eq!(
1570            antimodes(
1571                vec![
1572                    3usize, 3, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 4, 4, 5, 5, 6, 6, 7, 7, 13, 13,
1573                    14, 14, 15, 15
1574                ]
1575                .into_iter()
1576            ),
1577            (vec![3, 4, 5, 6, 7, 8, 9, 10, 11, 12], 13, 2)
1578        );
1579        assert_eq!(
1580            antimodes(vec![3usize, 3, 3, 4].into_iter()),
1581            (vec![4], 1, 1)
1582        );
1583        assert_eq!(
1584            antimodes(vec![4usize, 3, 3, 3].into_iter()),
1585            (vec![4], 1, 1)
1586        );
1587        assert_eq!(
1588            antimodes(vec![1usize, 1, 2, 2].into_iter()),
1589            (vec![1, 2], 2, 2)
1590        );
1591        let vec: Vec<u32> = vec![];
1592        assert_eq!(antimodes(vec.into_iter()), (vec![], 0, 0));
1593    }
1594
1595    #[test]
1596    fn antimodes_floats() {
1597        assert_eq!(
1598            antimodes(vec![3_f64, 5.0, 7.0, 9.0].into_iter()),
1599            (vec![3.0, 5.0, 7.0, 9.0], 4, 1)
1600        );
1601        assert_eq!(
1602            antimodes(vec![3_f64, 3.0, 3.0, 3.0].into_iter()),
1603            (vec![], 0, 0)
1604        );
1605        assert_eq!(
1606            antimodes(vec![3_f64, 3.0, 4.0, 4.0].into_iter()),
1607            (vec![3.0, 4.0], 2, 2)
1608        );
1609        assert_eq!(
1610            antimodes(vec![1_f64, 1.0, 2.0, 3.0, 3.0].into_iter()),
1611            (vec![2.0], 1, 1)
1612        );
1613    }
1614
1615    #[test]
1616    fn test_custom_percentiles() {
1617        // Test with integers
1618        let mut unsorted: Unsorted<i32> = Unsorted::new();
1619        unsorted.extend(1..=11); // [1,2,3,4,5,6,7,8,9,10,11]
1620
1621        let result = unsorted.custom_percentiles(&[25, 50, 75]).unwrap();
1622        assert_eq!(result, vec![3, 6, 9]);
1623
1624        // Test with strings
1625        let mut str_data = Unsorted::new();
1626        str_data.extend(vec!["a", "b", "c", "d", "e"]);
1627        let result = str_data.custom_percentiles(&[20, 40, 60, 80]).unwrap();
1628        assert_eq!(result, vec!["a", "b", "c", "d"]);
1629
1630        // Test with chars
1631        let mut char_data = Unsorted::new();
1632        char_data.extend('a'..='e');
1633        let result = char_data.custom_percentiles(&[25, 50, 75]).unwrap();
1634        assert_eq!(result, vec!['b', 'c', 'd']);
1635
1636        // Test with floats
1637        let mut float_data = Unsorted::new();
1638        float_data.extend(vec![1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9]);
1639        let result = float_data
1640            .custom_percentiles(&[10, 30, 50, 70, 90])
1641            .unwrap();
1642        assert_eq!(result, vec![1.1, 3.3, 5.5, 7.7, 9.9]);
1643
1644        // Test with empty percentiles array
1645        let result = float_data.custom_percentiles(&[]).unwrap();
1646        assert_eq!(result, Vec::<f64>::new());
1647
1648        // Test with duplicate percentiles
1649        let result = float_data.custom_percentiles(&[50, 50, 50]).unwrap();
1650        assert_eq!(result, vec![5.5]);
1651
1652        // Test with extreme percentiles
1653        let result = float_data.custom_percentiles(&[0, 100]).unwrap();
1654        assert_eq!(result, vec![1.1, 9.9]);
1655
1656        // Test with unsorted percentiles
1657        let result = float_data.custom_percentiles(&[75, 25, 50]).unwrap();
1658        assert_eq!(result, vec![3.3, 5.5, 7.7]); // results always sorted
1659
1660        // Test with single element
1661        let mut single = Unsorted::new();
1662        single.add(42);
1663        let result = single.custom_percentiles(&[0, 50, 100]).unwrap();
1664        assert_eq!(result, vec![42, 42, 42]);
1665    }
1666
1667    #[test]
1668    fn quartiles_stream() {
1669        assert_eq!(
1670            quartiles(vec![3usize, 5, 7].into_iter()),
1671            Some((3., 5., 7.))
1672        );
1673        assert_eq!(
1674            quartiles(vec![3usize, 5, 7, 9].into_iter()),
1675            Some((4., 6., 8.))
1676        );
1677        assert_eq!(
1678            quartiles(vec![1usize, 2, 7, 11].into_iter()),
1679            Some((1.5, 4.5, 9.))
1680        );
1681        assert_eq!(
1682            quartiles(vec![3usize, 5, 7, 9, 12].into_iter()),
1683            Some((4., 7., 10.5))
1684        );
1685        assert_eq!(
1686            quartiles(vec![2usize, 2, 3, 8, 10].into_iter()),
1687            Some((2., 3., 9.))
1688        );
1689        assert_eq!(
1690            quartiles(vec![3usize, 5, 7, 9, 12, 20].into_iter()),
1691            Some((5., 8., 12.))
1692        );
1693        assert_eq!(
1694            quartiles(vec![0usize, 2, 4, 8, 10, 11].into_iter()),
1695            Some((2., 6., 10.))
1696        );
1697        assert_eq!(
1698            quartiles(vec![3usize, 5, 7, 9, 12, 20, 21].into_iter()),
1699            Some((5., 9., 20.))
1700        );
1701        assert_eq!(
1702            quartiles(vec![1usize, 5, 6, 6, 7, 10, 19].into_iter()),
1703            Some((5., 6., 10.))
1704        );
1705    }
1706
1707    #[test]
1708    fn quartiles_floats() {
1709        assert_eq!(
1710            quartiles(vec![3_f64, 5., 7.].into_iter()),
1711            Some((3., 5., 7.))
1712        );
1713        assert_eq!(
1714            quartiles(vec![3_f64, 5., 7., 9.].into_iter()),
1715            Some((4., 6., 8.))
1716        );
1717        assert_eq!(
1718            quartiles(vec![3_f64, 5., 7., 9., 12.].into_iter()),
1719            Some((4., 7., 10.5))
1720        );
1721        assert_eq!(
1722            quartiles(vec![3_f64, 5., 7., 9., 12., 20.].into_iter()),
1723            Some((5., 8., 12.))
1724        );
1725        assert_eq!(
1726            quartiles(vec![3_f64, 5., 7., 9., 12., 20., 21.].into_iter()),
1727            Some((5., 9., 20.))
1728        );
1729    }
1730
1731    #[test]
1732    fn test_quartiles_zero_copy_small() {
1733        // Test edge cases for zero-copy quartiles
1734        let unsorted: Unsorted<i32> = Unsorted::new();
1735        assert_eq!(unsorted.quartiles_zero_copy(), None);
1736
1737        let mut unsorted = Unsorted::new();
1738        unsorted.extend(vec![1, 2]);
1739        assert_eq!(unsorted.quartiles_zero_copy(), None);
1740
1741        let mut unsorted = Unsorted::new();
1742        unsorted.extend(vec![1, 2, 3]);
1743        assert_eq!(unsorted.quartiles_zero_copy(), Some((1.0, 2.0, 3.0)));
1744
1745        // Test larger case
1746        let mut unsorted = Unsorted::new();
1747        unsorted.extend(vec![3, 5, 7, 9]);
1748        assert_eq!(unsorted.quartiles_zero_copy(), Some((4.0, 6.0, 8.0)));
1749    }
1750}
1751
1752#[cfg(test)]
1753mod bench {
1754    use super::*;
1755    use std::time::Instant;
1756
1757    #[test]
1758    #[ignore] // Run with `cargo test comprehensive_quartiles_benchmark -- --ignored --nocapture` to see performance comparison
1759    fn comprehensive_quartiles_benchmark() {
1760        // Test a wide range of data sizes
1761        let data_sizes = vec![
1762            1_000, 10_000, 100_000, 500_000, 1_000_000, 2_000_000, 5_000_000, 10_000_000,
1763        ];
1764
1765        println!("=== COMPREHENSIVE QUARTILES BENCHMARK ===\n");
1766
1767        for size in data_sizes {
1768            println!("--- Testing with {} elements ---", size);
1769
1770            // Test different data patterns
1771            let test_patterns = vec![
1772                ("Random", generate_random_data(size)),
1773                ("Reverse Sorted", {
1774                    let mut v = Vec::with_capacity(size);
1775                    for x in (0..size).rev() {
1776                        v.push(x as i32);
1777                    }
1778                    v
1779                }),
1780                ("Already Sorted", {
1781                    let mut v = Vec::with_capacity(size);
1782                    for x in 0..size {
1783                        v.push(x as i32);
1784                    }
1785                    v
1786                }),
1787                ("Many Duplicates", {
1788                    // Create a vector with just a few distinct values repeated many times
1789                    let mut v = Vec::with_capacity(size);
1790                    let chunk_size = size / 100;
1791                    for i in 0..100 {
1792                        v.extend(std::iter::repeat_n(i, chunk_size));
1793                    }
1794                    // Add any remaining elements
1795                    v.extend(std::iter::repeat_n(0, size - v.len()));
1796                    v
1797                }),
1798            ];
1799
1800            for (pattern_name, test_data) in test_patterns {
1801                println!("\n  Pattern: {}", pattern_name);
1802
1803                // Benchmark sorting-based approach
1804                let mut unsorted1 = Unsorted::new();
1805                unsorted1.extend(test_data.clone());
1806
1807                let start = Instant::now();
1808                let result_sorted = unsorted1.quartiles();
1809                let sorted_time = start.elapsed();
1810
1811                // Benchmark selection-based approach (with copying)
1812                let mut unsorted2 = Unsorted::new();
1813                unsorted2.extend(test_data.clone());
1814
1815                let start = Instant::now();
1816                let result_selection = unsorted2.quartiles_with_selection();
1817                let selection_time = start.elapsed();
1818
1819                // Benchmark zero-copy selection-based approach
1820                let mut unsorted3 = Unsorted::new();
1821                unsorted3.extend(test_data);
1822
1823                let start = Instant::now();
1824                let result_zero_copy = unsorted3.quartiles_zero_copy();
1825                let zero_copy_time = start.elapsed();
1826
1827                // Verify results are the same
1828                assert_eq!(result_sorted, result_selection);
1829                assert_eq!(result_sorted, result_zero_copy);
1830
1831                let selection_speedup =
1832                    sorted_time.as_nanos() as f64 / selection_time.as_nanos() as f64;
1833                let zero_copy_speedup =
1834                    sorted_time.as_nanos() as f64 / zero_copy_time.as_nanos() as f64;
1835
1836                println!("    Sorting:       {:>12?}", sorted_time);
1837                println!(
1838                    "    Selection:     {:>12?} (speedup: {:.2}x)",
1839                    selection_time, selection_speedup
1840                );
1841                println!(
1842                    "    Zero-copy:     {:>12?} (speedup: {:.2}x)",
1843                    zero_copy_time, zero_copy_speedup
1844                );
1845
1846                let best_algorithm =
1847                    if zero_copy_speedup > 1.0 && zero_copy_speedup >= selection_speedup {
1848                        "ZERO-COPY"
1849                    } else if selection_speedup > 1.0 {
1850                        "SELECTION"
1851                    } else {
1852                        "SORTING"
1853                    };
1854                println!("    Best: {}", best_algorithm);
1855            }
1856
1857            println!(); // Add blank line between sizes
1858        }
1859    }
1860
1861    // Generate random data for benchmarking
1862    fn generate_random_data(size: usize) -> Vec<i32> {
1863        // Simple LCG random number generator for reproducible results
1864        let mut rng = 1234567u64;
1865        let mut vec = Vec::with_capacity(size);
1866        for _ in 0..size {
1867            rng = rng.wrapping_mul(1103515245).wrapping_add(12345);
1868            vec.push((rng >> 16) as i32);
1869        }
1870        vec
1871    }
1872
1873    #[test]
1874    #[ignore] // Run with `cargo test find_selection_threshold -- --ignored --nocapture` to find exact threshold
1875    fn find_selection_threshold() {
1876        println!("=== FINDING SELECTION ALGORITHM THRESHOLD ===\n");
1877
1878        // Binary search approach to find the threshold
1879        let mut found_threshold = None;
1880        let test_sizes = vec![
1881            1_000_000, 2_000_000, 3_000_000, 4_000_000, 5_000_000, 7_500_000, 10_000_000,
1882            15_000_000, 20_000_000, 25_000_000, 30_000_000,
1883        ];
1884
1885        for size in test_sizes {
1886            println!("Testing size: {}", size);
1887
1888            // Use random data as it's most representative of real-world scenarios
1889            let test_data = generate_random_data(size);
1890
1891            // Run multiple iterations to get average performance
1892            let iterations = 3;
1893            let mut sorting_total = 0u128;
1894            let mut selection_total = 0u128;
1895            let mut zero_copy_total = 0u128;
1896
1897            for i in 0..iterations {
1898                println!("  Iteration {}/{}", i + 1, iterations);
1899
1900                // Sorting approach
1901                let mut unsorted1 = Unsorted::new();
1902                unsorted1.extend(test_data.clone());
1903
1904                let start = Instant::now();
1905                let _result_sorted = unsorted1.quartiles();
1906                sorting_total += start.elapsed().as_nanos();
1907
1908                // Selection approach (with copying)
1909                let mut unsorted2 = Unsorted::new();
1910                unsorted2.extend(test_data.clone());
1911
1912                let start = Instant::now();
1913                let _result_selection = unsorted2.quartiles_with_selection();
1914                selection_total += start.elapsed().as_nanos();
1915
1916                // Zero-copy selection approach
1917                let mut unsorted3 = Unsorted::new();
1918                unsorted3.extend(test_data.clone());
1919
1920                let start = Instant::now();
1921                let _result_zero_copy = unsorted3.quartiles_zero_copy();
1922                zero_copy_total += start.elapsed().as_nanos();
1923            }
1924
1925            let avg_sorting = sorting_total / iterations as u128;
1926            let avg_selection = selection_total / iterations as u128;
1927            let avg_zero_copy = zero_copy_total / iterations as u128;
1928            let selection_speedup = avg_sorting as f64 / avg_selection as f64;
1929            let zero_copy_speedup = avg_sorting as f64 / avg_zero_copy as f64;
1930
1931            println!(
1932                "  Average sorting:    {:>12.2}ms",
1933                avg_sorting as f64 / 1_000_000.0
1934            );
1935            println!(
1936                "  Average selection:  {:>12.2}ms (speedup: {:.2}x)",
1937                avg_selection as f64 / 1_000_000.0,
1938                selection_speedup
1939            );
1940            println!(
1941                "  Average zero-copy:  {:>12.2}ms (speedup: {:.2}x)",
1942                avg_zero_copy as f64 / 1_000_000.0,
1943                zero_copy_speedup
1944            );
1945
1946            if (selection_speedup > 1.0 || zero_copy_speedup > 1.0) && found_threshold.is_none() {
1947                found_threshold = Some(size);
1948                let best_method = if zero_copy_speedup > selection_speedup {
1949                    "Zero-copy"
1950                } else {
1951                    "Selection"
1952                };
1953                println!(
1954                    "  *** THRESHOLD FOUND: {} becomes faster at {} elements ***",
1955                    best_method, size
1956                );
1957            }
1958
1959            println!();
1960        }
1961
1962        match found_threshold {
1963            Some(threshold) => println!(
1964                "🎯 Selection algorithm becomes faster at approximately {} elements",
1965                threshold
1966            ),
1967            None => println!("❌ Selection algorithm did not become faster in the tested range"),
1968        }
1969    }
1970
1971    #[test]
1972    #[ignore] // Run with `cargo test benchmark_different_data_types -- --ignored --nocapture` to test different data types
1973    fn benchmark_different_data_types() {
1974        println!("=== BENCHMARKING DIFFERENT DATA TYPES ===\n");
1975
1976        let size = 5_000_000; // Use a large size where differences might be visible
1977
1978        // Test with f64 (floating point)
1979        println!("Testing with f64 data:");
1980        let float_data: Vec<f64> = generate_random_data(size)
1981            .into_iter()
1982            .map(|x| x as f64 / 1000.0)
1983            .collect();
1984
1985        let mut unsorted1 = Unsorted::new();
1986        unsorted1.extend(float_data.clone());
1987        let start = Instant::now();
1988        let _result = unsorted1.quartiles();
1989        let sorting_time = start.elapsed();
1990
1991        let mut unsorted2 = Unsorted::new();
1992        unsorted2.extend(float_data.clone());
1993        let start = Instant::now();
1994        let _result = unsorted2.quartiles_with_selection();
1995        let selection_time = start.elapsed();
1996
1997        let mut unsorted3 = Unsorted::new();
1998        unsorted3.extend(float_data);
1999        let start = Instant::now();
2000        let _result = unsorted3.quartiles_zero_copy();
2001        let zero_copy_time = start.elapsed();
2002
2003        println!("  Sorting:    {:?}", sorting_time);
2004        println!("  Selection:  {:?}", selection_time);
2005        println!("  Zero-copy:  {:?}", zero_copy_time);
2006        println!(
2007            "  Selection Speedup:  {:.2}x",
2008            sorting_time.as_nanos() as f64 / selection_time.as_nanos() as f64
2009        );
2010        println!(
2011            "  Zero-copy Speedup:  {:.2}x\n",
2012            sorting_time.as_nanos() as f64 / zero_copy_time.as_nanos() as f64
2013        );
2014
2015        // Test with i64 (larger integers)
2016        println!("Testing with i64 data:");
2017        let int64_data: Vec<i64> = generate_random_data(size)
2018            .into_iter()
2019            .map(|x| x as i64 * 1000)
2020            .collect();
2021
2022        let mut unsorted1 = Unsorted::new();
2023        unsorted1.extend(int64_data.clone());
2024        let start = Instant::now();
2025        let _result = unsorted1.quartiles();
2026        let sorting_time = start.elapsed();
2027
2028        let mut unsorted2 = Unsorted::new();
2029        unsorted2.extend(int64_data.clone());
2030        let start = Instant::now();
2031        let _result = unsorted2.quartiles_with_selection();
2032        let selection_time = start.elapsed();
2033
2034        let mut unsorted3 = Unsorted::new();
2035        unsorted3.extend(int64_data);
2036        let start = Instant::now();
2037        let _result = unsorted3.quartiles_zero_copy();
2038        let zero_copy_time = start.elapsed();
2039
2040        println!("  Sorting:    {:?}", sorting_time);
2041        println!("  Selection:  {:?}", selection_time);
2042        println!("  Zero-copy:  {:?}", zero_copy_time);
2043        println!(
2044            "  Selection Speedup:  {:.2}x",
2045            sorting_time.as_nanos() as f64 / selection_time.as_nanos() as f64
2046        );
2047        println!(
2048            "  Zero-copy Speedup:  {:.2}x",
2049            sorting_time.as_nanos() as f64 / zero_copy_time.as_nanos() as f64
2050        );
2051    }
2052}
stats/unsorted.rs

stats/
unsorted.rs