1use num_traits::ToPrimitive;
2use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
3use rayon::prelude::ParallelSlice;
4use rayon::slice::ParallelSliceMut;
5
6use serde::{Deserialize, Serialize};
7
8use {crate::Commute, crate::Partial};
9
10const PARALLEL_THRESHOLD: usize = 10_000;
11
12pub fn median<I>(it: I) -> Option<f64>
16where
17 I: Iterator,
18 <I as Iterator>::Item: PartialOrd + ToPrimitive,
19{
20 it.collect::<Unsorted<_>>().median()
21}
22
23pub fn mad<I>(it: I, precalc_median: Option<f64>) -> Option<f64>
25where
26 I: Iterator,
27 <I as Iterator>::Item: PartialOrd + ToPrimitive,
28{
29 it.collect::<Unsorted<_>>().mad(precalc_median)
30}
31
32pub fn quartiles<I>(it: I) -> Option<(f64, f64, f64)>
36where
37 I: Iterator,
38 <I as Iterator>::Item: PartialOrd + ToPrimitive,
39{
40 it.collect::<Unsorted<_>>().quartiles()
41}
42
43pub fn mode<T, I>(it: I) -> Option<T>
49where
50 T: PartialOrd + Clone,
51 I: Iterator<Item = T>,
52{
53 it.collect::<Unsorted<T>>().mode()
54}
55
56pub fn modes<T, I>(it: I) -> (Vec<T>, usize, u32)
74where
75 T: PartialOrd + Clone,
76 I: Iterator<Item = T>,
77{
78 it.collect::<Unsorted<T>>().modes()
79}
80
81pub fn antimodes<T, I>(it: I) -> (Vec<T>, usize, u32)
104where
105 T: PartialOrd + Clone,
106 I: Iterator<Item = T>,
107{
108 let (antimodes_result, antimodes_count, antimodes_occurrences) =
109 it.collect::<Unsorted<T>>().antimodes();
110 (antimodes_result, antimodes_count, antimodes_occurrences)
111}
112
113fn median_on_sorted<T>(data: &[T]) -> Option<f64>
114where
115 T: PartialOrd + ToPrimitive,
116{
117 Some(match data.len() {
118 0 => return None,
120 1 => data.first()?.to_f64()?,
122 len if len.is_multiple_of(2) => {
124 let idx = len / 2;
125 let v1 = unsafe { data.get_unchecked(idx - 1) }.to_f64()?;
128 let v2 = unsafe { data.get_unchecked(idx) }.to_f64()?;
129 f64::midpoint(v1, v2)
130 }
131 len => unsafe { data.get_unchecked(len / 2) }.to_f64()?,
134 })
135}
136
137fn mad_on_sorted<T>(data: &[T], precalc_median: Option<f64>) -> Option<f64>
138where
139 T: Sync + PartialOrd + ToPrimitive,
140{
141 if data.is_empty() {
142 return None;
143 }
144 let median_obs = precalc_median.unwrap_or_else(|| median_on_sorted(data).unwrap());
145
146 let mut abs_diff_vec = if data.len() < PARALLEL_THRESHOLD {
148 let mut vec = Vec::with_capacity(data.len());
150 for x in data {
151 vec.push((median_obs - unsafe { x.to_f64().unwrap_unchecked() }).abs());
152 }
153 vec
154 } else {
155 data.par_iter()
157 .map(|x| (median_obs - unsafe { x.to_f64().unwrap_unchecked() }).abs())
158 .collect()
159 };
160
161 if abs_diff_vec.len() < PARALLEL_THRESHOLD {
163 abs_diff_vec.sort_unstable_by(|a, b| unsafe { a.partial_cmp(b).unwrap_unchecked() });
164 } else {
165 abs_diff_vec.par_sort_unstable_by(|a, b| unsafe { a.partial_cmp(b).unwrap_unchecked() });
166 }
167 median_on_sorted(&abs_diff_vec)
168}
169
170fn quickselect<T>(data: &mut [Partial<T>], k: usize) -> Option<&T>
173where
174 T: PartialOrd,
175{
176 if data.is_empty() || k >= data.len() {
177 return None;
178 }
179
180 let mut left = 0;
181 let mut right = data.len() - 1;
182
183 loop {
184 if left == right {
185 return Some(&data[left].0);
186 }
187
188 let pivot_idx = median_of_three_pivot(data, left, right);
190 let pivot_idx = partition(data, left, right, pivot_idx);
191
192 match k.cmp(&pivot_idx) {
193 std::cmp::Ordering::Equal => return Some(&data[pivot_idx].0),
194 std::cmp::Ordering::Less => right = pivot_idx - 1,
195 std::cmp::Ordering::Greater => left = pivot_idx + 1,
196 }
197 }
198}
199
200fn quickselect_by_index<'a, T>(
203 data: &'a [Partial<T>],
204 indices: &mut [usize],
205 k: usize,
206) -> Option<&'a T>
207where
208 T: PartialOrd,
209{
210 if data.is_empty() || indices.is_empty() || k >= indices.len() {
211 return None;
212 }
213
214 let mut left = 0;
215 let mut right = indices.len() - 1;
216
217 loop {
218 if left == right {
219 return Some(&data[indices[left]].0);
220 }
221
222 let pivot_idx = median_of_three_pivot_by_index(data, indices, left, right);
224 let pivot_idx = partition_by_index(data, indices, left, right, pivot_idx);
225
226 match k.cmp(&pivot_idx) {
227 std::cmp::Ordering::Equal => return Some(&data[indices[pivot_idx]].0),
228 std::cmp::Ordering::Less => right = pivot_idx - 1,
229 std::cmp::Ordering::Greater => left = pivot_idx + 1,
230 }
231 }
232}
233
234fn median_of_three_pivot<T>(data: &[Partial<T>], left: usize, right: usize) -> usize
236where
237 T: PartialOrd,
238{
239 let mid = left + (right - left) / 2;
240
241 if data[left] <= data[mid] {
242 if data[mid] <= data[right] {
243 mid
244 } else if data[left] <= data[right] {
245 right
246 } else {
247 left
248 }
249 } else if data[left] <= data[right] {
250 left
251 } else if data[mid] <= data[right] {
252 right
253 } else {
254 mid
255 }
256}
257
258fn median_of_three_pivot_by_index<T>(
260 data: &[Partial<T>],
261 indices: &[usize],
262 left: usize,
263 right: usize,
264) -> usize
265where
266 T: PartialOrd,
267{
268 let mid = left + (right - left) / 2;
269
270 if data[indices[left]] <= data[indices[mid]] {
271 if data[indices[mid]] <= data[indices[right]] {
272 mid
273 } else if data[indices[left]] <= data[indices[right]] {
274 right
275 } else {
276 left
277 }
278 } else if data[indices[left]] <= data[indices[right]] {
279 left
280 } else if data[indices[mid]] <= data[indices[right]] {
281 right
282 } else {
283 mid
284 }
285}
286
287fn partition<T>(data: &mut [Partial<T>], left: usize, right: usize, pivot_idx: usize) -> usize
289where
290 T: PartialOrd,
291{
292 data.swap(pivot_idx, right);
294 let mut store_idx = left;
295
296 for i in left..right {
299 if unsafe { data.get_unchecked(i) <= data.get_unchecked(right) } {
302 data.swap(i, store_idx);
303 store_idx += 1;
304 }
305 }
306
307 data.swap(store_idx, right);
309 store_idx
310}
311
312fn partition_by_index<T>(
314 data: &[Partial<T>],
315 indices: &mut [usize],
316 left: usize,
317 right: usize,
318 pivot_idx: usize,
319) -> usize
320where
321 T: PartialOrd,
322{
323 indices.swap(pivot_idx, right);
325 let mut store_idx = left;
326
327 let pivot_idx_cached = unsafe { *indices.get_unchecked(right) };
331 let pivot_val = unsafe { data.get_unchecked(pivot_idx_cached) };
332
333 let mut elem_idx: usize;
335 for i in left..right {
336 elem_idx = unsafe { *indices.get_unchecked(i) };
338 if unsafe { data.get_unchecked(elem_idx) <= pivot_val } {
339 indices.swap(i, store_idx);
340 store_idx += 1;
341 }
342 }
343
344 indices.swap(store_idx, right);
346 store_idx
347}
348
349fn quartiles_on_sorted<T>(data: &[Partial<T>]) -> Option<(f64, f64, f64)>
353where
354 T: PartialOrd + ToPrimitive,
355{
356 let len = data.len();
357
358 match len {
360 0..=2 => return None,
361 3 => {
362 return Some(
363 unsafe {
365 (
366 data.get_unchecked(0).0.to_f64()?,
367 data.get_unchecked(1).0.to_f64()?,
368 data.get_unchecked(2).0.to_f64()?,
369 )
370 },
371 );
372 }
373 _ => {}
374 }
375
376 let k = len / 4;
378 let remainder = len % 4;
379
380 unsafe {
383 Some(match remainder {
384 0 => {
385 let q1 = f64::midpoint(
396 data.get_unchecked(k - 1).0.to_f64()?,
397 data.get_unchecked(k).0.to_f64()?,
398 );
399 let q2 = f64::midpoint(
400 data.get_unchecked(2 * k - 1).0.to_f64()?,
401 data.get_unchecked(2 * k).0.to_f64()?,
402 );
403 let q3 = f64::midpoint(
404 data.get_unchecked(3 * k - 1).0.to_f64()?,
405 data.get_unchecked(3 * k).0.to_f64()?,
406 );
407 (q1, q2, q3)
408 }
409 1 => {
410 let q1 = f64::midpoint(
421 data.get_unchecked(k - 1).0.to_f64()?,
422 data.get_unchecked(k).0.to_f64()?,
423 );
424 let q2 = data.get_unchecked(2 * k).0.to_f64()?;
425 let q3 = f64::midpoint(
426 data.get_unchecked(3 * k).0.to_f64()?,
427 data.get_unchecked(3 * k + 1).0.to_f64()?,
428 );
429 (q1, q2, q3)
430 }
431 2 => {
432 let q1 = data.get_unchecked(k).0.to_f64()?;
443 let q2 = f64::midpoint(
444 data.get_unchecked(2 * k).0.to_f64()?,
445 data.get_unchecked(2 * k + 1).0.to_f64()?,
446 );
447 let q3 = data.get_unchecked(3 * k + 1).0.to_f64()?;
448 (q1, q2, q3)
449 }
450 _ => {
451 let q1 = data.get_unchecked(k).0.to_f64()?;
462 let q2 = data.get_unchecked(2 * k + 1).0.to_f64()?;
463 let q3 = data.get_unchecked(3 * k + 2).0.to_f64()?;
464 (q1, q2, q3)
465 }
466 })
467 }
468}
469
470fn quartiles_with_selection<T>(data: &mut [Partial<T>]) -> Option<(f64, f64, f64)>
473where
474 T: PartialOrd + ToPrimitive,
475{
476 let len = data.len();
477
478 match len {
480 0..=2 => return None,
481 3 => {
482 let min_val = quickselect(data, 0)?.to_f64()?;
484 let med_val = quickselect(data, 1)?.to_f64()?;
485 let max_val = quickselect(data, 2)?.to_f64()?;
486 return Some((min_val, med_val, max_val));
487 }
488 _ => {}
489 }
490
491 let k = len / 4;
493 let remainder = len % 4;
494
495 match remainder {
497 0 => {
498 let q1_low = quickselect(data, k - 1)?.to_f64()?;
503 let q1_high = quickselect(data, k)?.to_f64()?;
504 let q1 = f64::midpoint(q1_low, q1_high);
505
506 let q2_low = quickselect(data, 2 * k - 1)?.to_f64()?;
507 let q2_high = quickselect(data, 2 * k)?.to_f64()?;
508 let q2 = f64::midpoint(q2_low, q2_high);
509
510 let q3_low = quickselect(data, 3 * k - 1)?.to_f64()?;
511 let q3_high = quickselect(data, 3 * k)?.to_f64()?;
512 let q3 = f64::midpoint(q3_low, q3_high);
513
514 Some((q1, q2, q3))
515 }
516 1 => {
517 let q1_low = quickselect(data, k - 1)?.to_f64()?;
522 let q1_high = quickselect(data, k)?.to_f64()?;
523 let q1 = f64::midpoint(q1_low, q1_high);
524
525 let q2 = quickselect(data, 2 * k)?.to_f64()?;
526
527 let q3_low = quickselect(data, 3 * k)?.to_f64()?;
528 let q3_high = quickselect(data, 3 * k + 1)?.to_f64()?;
529 let q3 = f64::midpoint(q3_low, q3_high);
530
531 Some((q1, q2, q3))
532 }
533 2 => {
534 let q1 = quickselect(data, k)?.to_f64()?;
539
540 let q2_low = quickselect(data, 2 * k)?.to_f64()?;
541 let q2_high = quickselect(data, 2 * k + 1)?.to_f64()?;
542 let q2 = f64::midpoint(q2_low, q2_high);
543
544 let q3 = quickselect(data, 3 * k + 1)?.to_f64()?;
545
546 Some((q1, q2, q3))
547 }
548 _ => {
549 let q1 = quickselect(data, k)?.to_f64()?;
554 let q2 = quickselect(data, 2 * k + 1)?.to_f64()?;
555 let q3 = quickselect(data, 3 * k + 2)?.to_f64()?;
556
557 Some((q1, q2, q3))
558 }
559 }
560}
561
562fn quartiles_with_zero_copy_selection<T>(data: &[Partial<T>]) -> Option<(f64, f64, f64)>
565where
566 T: PartialOrd + ToPrimitive,
567{
568 let len = data.len();
569
570 match len {
572 0..=2 => return None,
573 3 => {
574 let mut indices = vec![0, 1, 2];
576 let min_val = quickselect_by_index(data, &mut indices, 0)?.to_f64()?;
577 let med_val = quickselect_by_index(data, &mut indices, 1)?.to_f64()?;
578 let max_val = quickselect_by_index(data, &mut indices, 2)?.to_f64()?;
579 return Some((min_val, med_val, max_val));
580 }
581 _ => {}
582 }
583
584 let k = len / 4;
586 let remainder = len % 4;
587
588 let indices_template: Vec<usize> = (0..len).collect();
590 #[allow(unused)]
591 let mut indices: Vec<usize> = Vec::with_capacity(len);
592
593 match remainder {
597 0 => {
598 indices.clone_from(&indices_template);
600 let q1_low = quickselect_by_index(data, &mut indices, k - 1)?.to_f64()?;
601 indices.clone_from(&indices_template);
602 let q1_high = quickselect_by_index(data, &mut indices, k)?.to_f64()?;
603 let q1 = f64::midpoint(q1_low, q1_high);
604
605 indices.clone_from(&indices_template);
606 let q2_low = quickselect_by_index(data, &mut indices, 2 * k - 1)?.to_f64()?;
607 indices.clone_from(&indices_template);
608 let q2_high = quickselect_by_index(data, &mut indices, 2 * k)?.to_f64()?;
609 let q2 = f64::midpoint(q2_low, q2_high);
610
611 indices.clone_from(&indices_template);
612 let q3_low = quickselect_by_index(data, &mut indices, 3 * k - 1)?.to_f64()?;
613 indices.clone_from(&indices_template);
614 let q3_high = quickselect_by_index(data, &mut indices, 3 * k)?.to_f64()?;
615 let q3 = f64::midpoint(q3_low, q3_high);
616
617 Some((q1, q2, q3))
618 }
619 1 => {
620 indices.clone_from(&indices_template);
622 let q1_low = quickselect_by_index(data, &mut indices, k - 1)?.to_f64()?;
623 indices.clone_from(&indices_template);
624 let q1_high = quickselect_by_index(data, &mut indices, k)?.to_f64()?;
625 let q1 = f64::midpoint(q1_low, q1_high);
626
627 indices.clone_from(&indices_template);
628 let q2 = quickselect_by_index(data, &mut indices, 2 * k)?.to_f64()?;
629
630 indices.clone_from(&indices_template);
631 let q3_low = quickselect_by_index(data, &mut indices, 3 * k)?.to_f64()?;
632 indices.clone_from(&indices_template);
633 let q3_high = quickselect_by_index(data, &mut indices, 3 * k + 1)?.to_f64()?;
634 let q3 = f64::midpoint(q3_low, q3_high);
635
636 Some((q1, q2, q3))
637 }
638 2 => {
639 indices.clone_from(&indices_template);
641 let q1 = quickselect_by_index(data, &mut indices, k)?.to_f64()?;
642
643 indices.clone_from(&indices_template);
644 let q2_low = quickselect_by_index(data, &mut indices, 2 * k)?.to_f64()?;
645 indices.clone_from(&indices_template);
646 let q2_high = quickselect_by_index(data, &mut indices, 2 * k + 1)?.to_f64()?;
647 let q2 = f64::midpoint(q2_low, q2_high);
648
649 indices.clone_from(&indices_template);
650 let q3 = quickselect_by_index(data, &mut indices, 3 * k + 1)?.to_f64()?;
651
652 Some((q1, q2, q3))
653 }
654 _ => {
655 indices.clone_from(&indices_template);
657 let q1 = quickselect_by_index(data, &mut indices, k)?.to_f64()?;
658 indices.clone_from(&indices_template);
659 let q2 = quickselect_by_index(data, &mut indices, 2 * k + 1)?.to_f64()?;
660 indices.clone_from(&indices_template);
661 let q3 = quickselect_by_index(data, &mut indices, 3 * k + 2)?.to_f64()?;
662
663 Some((q1, q2, q3))
664 }
665 }
666}
667
668fn mode_on_sorted<T, I>(it: I) -> Option<T>
669where
670 T: PartialOrd,
671 I: Iterator<Item = T>,
672{
673 use std::cmp::Ordering;
674
675 let (mut mode, mut next) = (None, None);
682 let (mut mode_count, mut next_count) = (0usize, 0usize);
683 for x in it {
684 if mode.as_ref() == Some(&x) {
685 mode_count += 1;
686 } else if next.as_ref() == Some(&x) {
687 next_count += 1;
688 } else {
689 next = Some(x);
690 next_count = 0;
691 }
692
693 match next_count.cmp(&mode_count) {
694 Ordering::Greater => {
695 mode = next;
696 mode_count = next_count;
697 next = None;
698 next_count = 0;
699 }
700 Ordering::Equal => {
701 mode = None;
702 mode_count = 0;
703 }
704 Ordering::Less => {}
705 }
706 }
707 mode
708}
709
710#[allow(clippy::type_complexity)]
738#[inline]
739fn modes_and_antimodes_on_sorted_slice<T>(
740 data: &[Partial<T>],
741) -> ((Vec<T>, usize, u32), (Vec<T>, usize, u32))
742where
743 T: PartialOrd + Clone,
744{
745 let size = data.len();
746
747 if size == 0 {
749 return ((Vec::new(), 0, 0), (Vec::new(), 0, 0));
750 }
751
752 let sqrt_size = if size == 0 {
755 0
756 } else {
757 let mut x = size;
758 let mut y = x.div_ceil(2);
759 while y < x {
760 x = y;
761 y = usize::midpoint(x, size / x);
762 }
763 x
764 };
765 let mut runs: Vec<(&T, u32)> = Vec::with_capacity(sqrt_size.clamp(16, 1_000));
766
767 let mut current_value = &data[0].0;
768 let mut current_count = 1;
769 let mut highest_count = 1;
770 let mut lowest_count = u32::MAX;
771
772 for x in data.iter().skip(1) {
774 if x.0 == *current_value {
775 current_count += 1;
776 highest_count = highest_count.max(current_count);
777 } else {
778 runs.push((current_value, current_count));
779 lowest_count = lowest_count.min(current_count);
780 current_value = &x.0;
781 current_count = 1;
782 }
783 }
784 runs.push((current_value, current_count));
785 lowest_count = lowest_count.min(current_count);
786
787 if runs.len() == 1 {
789 let (val, count) = runs.pop().unwrap();
790 return ((vec![val.clone()], 1, count), (Vec::new(), 0, 0));
791 }
792
793 if highest_count == 1 {
795 let antimodes_count = runs.len().min(10);
796 let total_count = runs.len();
797 let mut antimodes = Vec::with_capacity(antimodes_count);
798 for (val, _) in runs.into_iter().take(antimodes_count) {
799 antimodes.push(val.clone());
800 }
801 return ((Vec::new(), 0, 0), (antimodes, total_count, 1));
803 }
804
805 let estimated_modes = (runs.len() / 10).clamp(1, 10);
809 let estimated_antimodes = 10.min(runs.len());
810
811 let mut modes_indices = Vec::with_capacity(estimated_modes);
813 let mut antimodes_indices = Vec::with_capacity(estimated_antimodes);
814 let mut mode_count = 0;
815 let mut antimodes_count = 0;
816 let mut antimodes_collected = 0_u32;
817
818 for (idx, (_, count)) in runs.iter().enumerate() {
820 if *count == highest_count {
821 modes_indices.push(idx);
822 mode_count += 1;
823 }
824 if *count == lowest_count {
825 antimodes_count += 1;
826 if antimodes_collected < 10 {
827 antimodes_indices.push(idx);
828 antimodes_collected += 1;
829 }
830 }
831 }
832
833 let modes_result: Vec<T> = modes_indices
835 .into_iter()
836 .map(|idx| runs[idx].0.clone())
837 .collect();
838 let antimodes_result: Vec<T> = antimodes_indices
839 .into_iter()
840 .map(|idx| runs[idx].0.clone())
841 .collect();
842
843 (
844 (modes_result, mode_count, highest_count),
845 (antimodes_result, antimodes_count, lowest_count),
846 )
847}
848
849#[allow(clippy::unsafe_derive_deserialize)]
857#[derive(Clone, Serialize, Deserialize, Eq, PartialEq)]
858pub struct Unsorted<T> {
859 sorted: bool,
860 data: Vec<Partial<T>>,
861}
862
863impl<T: PartialOrd> Unsorted<T> {
864 #[inline]
866 #[must_use]
867 pub fn new() -> Unsorted<T> {
868 Default::default()
869 }
870
871 #[allow(clippy::inline_always)]
873 #[inline(always)]
874 pub fn add(&mut self, v: T) {
875 self.sorted = false;
876 self.data.push(Partial(v));
877 }
878
879 #[inline]
881 #[must_use]
882 pub const fn len(&self) -> usize {
883 self.data.len()
884 }
885
886 #[inline]
887 #[must_use]
888 pub const fn is_empty(&self) -> bool {
889 self.data.is_empty()
890 }
891
892 #[inline]
893 fn sort(&mut self) {
894 if !self.sorted {
895 if self.data.len() < PARALLEL_THRESHOLD {
897 self.data.sort_unstable();
898 } else {
899 self.data.par_sort_unstable();
900 }
901 self.sorted = true;
902 }
903 }
904
905 #[inline]
906 const fn already_sorted(&mut self) {
907 self.sorted = true;
908 }
909
910 #[inline]
912 pub fn add_bulk(&mut self, values: Vec<T>) {
913 self.sorted = false;
914 self.data.reserve(values.len());
915 self.data.extend(values.into_iter().map(Partial));
916 }
917
918 #[inline]
920 pub fn shrink_to_fit(&mut self) {
921 self.data.shrink_to_fit();
922 }
923
924 #[inline]
926 #[must_use]
927 pub fn with_capacity(capacity: usize) -> Self {
928 Unsorted {
929 sorted: true,
930 data: Vec::with_capacity(capacity),
931 }
932 }
933
934 #[inline]
936 pub fn push_ascending(&mut self, value: T) {
937 if let Some(last) = self.data.last() {
938 debug_assert!(last.0 <= value, "Value must be >= than last element");
939 }
940 self.data.push(Partial(value));
941 }
943}
944
945impl<T: PartialOrd + PartialEq + Clone> Unsorted<T> {
946 #[inline]
947 pub fn cardinality(&mut self, sorted: bool, parallel_threshold: usize) -> u64 {
955 const CHUNK_SIZE: usize = 2048; const DEFAULT_PARALLEL_THRESHOLD: usize = 10_240; let len = self.data.len();
959 match len {
960 0 => return 0,
961 1 => return 1,
962 _ => {}
963 }
964
965 if sorted {
966 self.already_sorted();
967 } else {
968 self.sort();
969 }
970
971 let use_parallel = parallel_threshold != 0
972 && (parallel_threshold == 1
973 || len > parallel_threshold.max(DEFAULT_PARALLEL_THRESHOLD));
974
975 if use_parallel {
976 type ChunkInfo<'a, T> = Vec<(u64, Option<&'a Partial<T>>, Option<&'a Partial<T>>)>;
979 let chunk_info: ChunkInfo<'_, T> = self
980 .data
981 .par_chunks(CHUNK_SIZE)
982 .map(|chunk| {
983 let mut count = u64::from(!chunk.is_empty());
985 for window in chunk.windows(2) {
986 if unsafe { window.get_unchecked(0) != window.get_unchecked(1) } {
988 count += 1;
989 }
990 }
991 (count, chunk.first(), chunk.last())
992 })
993 .collect();
994
995 let mut total = 0;
997 for (i, &(count, first_opt, _last_opt)) in chunk_info.iter().enumerate() {
998 total += count;
999
1000 if i > 0
1002 && let (Some(prev_last), Some(curr_first)) = (chunk_info[i - 1].2, first_opt)
1003 && prev_last == curr_first
1004 {
1005 total -= 1; }
1007 }
1008
1009 total
1010 } else {
1011 let mut count = u64::from(!self.data.is_empty());
1016
1017 for window in self.data.windows(2) {
1018 if unsafe { window.get_unchecked(0) != window.get_unchecked(1) } {
1020 count += 1;
1021 }
1022 }
1023 count
1024 }
1025 }
1026}
1027
1028impl<T: PartialOrd + Clone> Unsorted<T> {
1029 #[inline]
1031 pub fn mode(&mut self) -> Option<T> {
1032 if self.data.is_empty() {
1033 return None;
1034 }
1035 self.sort();
1036 mode_on_sorted(self.data.iter().map(|p| &p.0)).cloned()
1037 }
1038
1039 #[inline]
1043 fn modes(&mut self) -> (Vec<T>, usize, u32) {
1044 if self.data.is_empty() {
1045 return (Vec::new(), 0, 0);
1046 }
1047 self.sort();
1048 modes_and_antimodes_on_sorted_slice(&self.data).0
1049 }
1050
1051 #[inline]
1054 fn antimodes(&mut self) -> (Vec<T>, usize, u32) {
1055 if self.data.is_empty() {
1056 return (Vec::new(), 0, 0);
1057 }
1058 self.sort();
1059 modes_and_antimodes_on_sorted_slice(&self.data).1
1060 }
1061
1062 #[allow(clippy::type_complexity)]
1065 #[inline]
1066 pub fn modes_antimodes(&mut self) -> ((Vec<T>, usize, u32), (Vec<T>, usize, u32)) {
1067 if self.data.is_empty() {
1068 return ((Vec::new(), 0, 0), (Vec::new(), 0, 0));
1069 }
1070 self.sort();
1071 modes_and_antimodes_on_sorted_slice(&self.data)
1072 }
1073}
1074
1075impl<T: PartialOrd + ToPrimitive> Unsorted<T> {
1076 #[inline]
1078 pub fn median(&mut self) -> Option<f64> {
1079 if self.data.is_empty() {
1080 return None;
1081 }
1082 self.sort();
1083 median_on_sorted(&self.data)
1084 }
1085}
1086
1087impl<T: PartialOrd + ToPrimitive> Unsorted<T> {
1088 #[inline]
1090 pub fn mad(&mut self, existing_median: Option<f64>) -> Option<f64> {
1091 if self.data.is_empty() {
1092 return None;
1093 }
1094 if existing_median.is_none() {
1095 self.sort();
1096 }
1097 mad_on_sorted(&self.data, existing_median)
1098 }
1099}
1100
1101impl<T: PartialOrd + ToPrimitive> Unsorted<T> {
1102 #[inline]
1107 pub fn quartiles(&mut self) -> Option<(f64, f64, f64)> {
1108 if self.data.is_empty() {
1109 return None;
1110 }
1111 self.sort();
1112 quartiles_on_sorted(&self.data)
1113 }
1114}
1115
1116impl<T: PartialOrd + ToPrimitive + Clone> Unsorted<T> {
1117 #[inline]
1129 pub fn quartiles_with_selection(&mut self) -> Option<(f64, f64, f64)> {
1130 if self.data.is_empty() {
1131 return None;
1132 }
1133 if self.sorted {
1135 return quartiles_with_zero_copy_selection(&self.data);
1136 }
1137 let mut data_copy: Vec<Partial<T>> =
1139 self.data.iter().map(|x| Partial(x.0.clone())).collect();
1140 quartiles_with_selection(&mut data_copy)
1141 }
1142}
1143
1144impl<T: PartialOrd + ToPrimitive> Unsorted<T> {
1145 #[inline]
1151 #[must_use]
1152 pub fn quartiles_zero_copy(&self) -> Option<(f64, f64, f64)> {
1153 if self.data.is_empty() {
1154 return None;
1155 }
1156 quartiles_with_zero_copy_selection(&self.data)
1157 }
1158}
1159
1160impl<T: PartialOrd> Commute for Unsorted<T> {
1161 #[inline]
1162 fn merge(&mut self, mut v: Unsorted<T>) {
1163 if v.is_empty() {
1164 return;
1165 }
1166
1167 self.sorted = false;
1168 self.data.extend(std::mem::take(&mut v.data));
1170 }
1171}
1172
1173impl<T: PartialOrd> Default for Unsorted<T> {
1174 #[inline]
1175 fn default() -> Unsorted<T> {
1176 Unsorted {
1177 data: Vec::with_capacity(10_000),
1178 sorted: true, }
1180 }
1181}
1182
1183impl<T: PartialOrd> FromIterator<T> for Unsorted<T> {
1184 #[inline]
1185 fn from_iter<I: IntoIterator<Item = T>>(it: I) -> Unsorted<T> {
1186 let mut v = Unsorted::new();
1187 v.extend(it);
1188 v
1189 }
1190}
1191
1192impl<T: PartialOrd> Extend<T> for Unsorted<T> {
1193 #[inline]
1194 fn extend<I: IntoIterator<Item = T>>(&mut self, it: I) {
1195 self.sorted = false;
1196 self.data.extend(it.into_iter().map(Partial));
1197 }
1198}
1199
1200fn custom_percentiles_on_sorted<T>(data: &[Partial<T>], percentiles: &[u8]) -> Option<Vec<T>>
1201where
1202 T: PartialOrd + Clone,
1203{
1204 let len = data.len();
1205
1206 if len == 0 || percentiles.iter().any(|&p| p > 100) {
1208 return None;
1209 }
1210
1211 let unique_percentiles: Vec<u8> = if percentiles.len() <= 1 {
1213 percentiles.to_vec()
1215 } else {
1216 let is_sorted_unique = percentiles.windows(2).all(|w| w[0] < w[1]);
1218
1219 if is_sorted_unique {
1220 percentiles.to_vec()
1222 } else {
1223 use std::collections::HashSet;
1225 let mut seen = HashSet::with_capacity(percentiles.len().min(100));
1226 let mut sorted_unique = Vec::with_capacity(percentiles.len());
1227 for &p in percentiles {
1228 if seen.insert(p) {
1229 sorted_unique.push(p);
1230 }
1231 }
1232 sorted_unique.sort_unstable();
1233 sorted_unique
1234 }
1235 };
1236
1237 let mut results = Vec::with_capacity(unique_percentiles.len());
1238
1239 unsafe {
1243 for &p in &unique_percentiles {
1244 #[allow(clippy::cast_sign_loss)]
1248 let rank = ((f64::from(p) / 100.0) * len as f64).ceil() as usize;
1249
1250 let idx = rank.saturating_sub(1);
1252
1253 results.push(data.get_unchecked(idx).0.clone());
1255 }
1256 }
1257
1258 Some(results)
1259}
1260
1261impl<T: PartialOrd + Clone> Unsorted<T> {
1262 #[inline]
1284 pub fn custom_percentiles(&mut self, percentiles: &[u8]) -> Option<Vec<T>> {
1285 if self.data.is_empty() {
1286 return None;
1287 }
1288 self.sort();
1289 custom_percentiles_on_sorted(&self.data, percentiles)
1290 }
1291}
1292
1293#[cfg(test)]
1294mod test {
1295 use super::*;
1296
1297 #[test]
1298 fn test_cardinality_empty() {
1299 let mut unsorted: Unsorted<i32> = Unsorted::new();
1300 assert_eq!(unsorted.cardinality(false, 1), 0);
1301 }
1302
1303 #[test]
1304 fn test_cardinality_single_element() {
1305 let mut unsorted = Unsorted::new();
1306 unsorted.add(5);
1307 assert_eq!(unsorted.cardinality(false, 1), 1);
1308 }
1309
1310 #[test]
1311 fn test_cardinality_unique_elements() {
1312 let mut unsorted = Unsorted::new();
1313 unsorted.extend(vec![1, 2, 3, 4, 5]);
1314 assert_eq!(unsorted.cardinality(false, 1), 5);
1315 }
1316
1317 #[test]
1318 fn test_cardinality_duplicate_elements() {
1319 let mut unsorted = Unsorted::new();
1320 unsorted.extend(vec![1, 2, 2, 3, 3, 3, 4, 4, 4, 4]);
1321 assert_eq!(unsorted.cardinality(false, 1), 4);
1322 }
1323
1324 #[test]
1325 fn test_cardinality_all_same() {
1326 let mut unsorted = Unsorted::new();
1327 unsorted.extend(vec![1; 100]);
1328 assert_eq!(unsorted.cardinality(false, 1), 1);
1329 }
1330
1331 #[test]
1332 fn test_cardinality_large_range() {
1333 let mut unsorted = Unsorted::new();
1334 unsorted.extend(0..1_000_000);
1335 assert_eq!(unsorted.cardinality(false, 1), 1_000_000);
1336 }
1337
1338 #[test]
1339 fn test_cardinality_large_range_sequential() {
1340 let mut unsorted = Unsorted::new();
1341 unsorted.extend(0..1_000_000);
1342 assert_eq!(unsorted.cardinality(false, 2_000_000), 1_000_000);
1343 }
1344
1345 #[test]
1346 fn test_cardinality_presorted() {
1347 let mut unsorted = Unsorted::new();
1348 unsorted.extend(vec![1, 2, 3, 4, 5]);
1349 unsorted.sort();
1350 assert_eq!(unsorted.cardinality(true, 1), 5);
1351 }
1352
1353 #[test]
1354 fn test_cardinality_float() {
1355 let mut unsorted = Unsorted::new();
1356 unsorted.extend(vec![1.0, 1.0, 2.0, 3.0, 3.0, 4.0]);
1357 assert_eq!(unsorted.cardinality(false, 1), 4);
1358 }
1359
1360 #[test]
1361 fn test_cardinality_string() {
1362 let mut unsorted = Unsorted::new();
1363 unsorted.extend(vec!["a", "b", "b", "c", "c", "c"]);
1364 assert_eq!(unsorted.cardinality(false, 1), 3);
1365 }
1366
1367 #[test]
1368 fn test_quartiles_selection_vs_sorted() {
1369 let test_cases = vec![
1371 vec![3, 5, 7, 9],
1372 vec![3, 5, 7],
1373 vec![1, 2, 7, 11],
1374 vec![3, 5, 7, 9, 12],
1375 vec![2, 2, 3, 8, 10],
1376 vec![3, 5, 7, 9, 12, 20],
1377 vec![0, 2, 4, 8, 10, 11],
1378 vec![3, 5, 7, 9, 12, 20, 21],
1379 vec![1, 5, 6, 6, 7, 10, 19],
1380 ];
1381
1382 for test_case in test_cases {
1383 let mut unsorted1 = Unsorted::new();
1384 let mut unsorted2 = Unsorted::new();
1385 let mut unsorted3 = Unsorted::new();
1386 unsorted1.extend(test_case.clone());
1387 unsorted2.extend(test_case.clone());
1388 unsorted3.extend(test_case.clone());
1389
1390 let result_sorted = unsorted1.quartiles();
1391 let result_selection = unsorted2.quartiles_with_selection();
1392 let result_zero_copy = unsorted3.quartiles_zero_copy();
1393
1394 assert_eq!(
1395 result_sorted, result_selection,
1396 "Selection mismatch for test case: {:?}",
1397 test_case
1398 );
1399 assert_eq!(
1400 result_sorted, result_zero_copy,
1401 "Zero-copy mismatch for test case: {:?}",
1402 test_case
1403 );
1404 }
1405 }
1406
1407 #[test]
1408 fn test_quartiles_with_selection_small() {
1409 let mut unsorted: Unsorted<i32> = Unsorted::new();
1411 assert_eq!(unsorted.quartiles_with_selection(), None);
1412
1413 let mut unsorted = Unsorted::new();
1414 unsorted.extend(vec![1, 2]);
1415 assert_eq!(unsorted.quartiles_with_selection(), None);
1416
1417 let mut unsorted = Unsorted::new();
1418 unsorted.extend(vec![1, 2, 3]);
1419 assert_eq!(unsorted.quartiles_with_selection(), Some((1.0, 2.0, 3.0)));
1420 }
1421
1422 #[test]
1423 fn test_quickselect() {
1424 let data = vec![
1425 Partial(3),
1426 Partial(1),
1427 Partial(4),
1428 Partial(1),
1429 Partial(5),
1430 Partial(9),
1431 Partial(2),
1432 Partial(6),
1433 ];
1434
1435 assert_eq!(quickselect(&mut data.clone(), 0), Some(&1));
1437 assert_eq!(quickselect(&mut data.clone(), 3), Some(&3));
1438 assert_eq!(quickselect(&mut data.clone(), 7), Some(&9));
1439
1440 let mut empty: Vec<Partial<i32>> = vec![];
1442 assert_eq!(quickselect(&mut empty, 0), None);
1443
1444 let mut data = vec![Partial(3), Partial(1), Partial(4), Partial(1), Partial(5)];
1445 assert_eq!(quickselect(&mut data, 10), None); }
1447
1448 #[test]
1449 fn median_stream() {
1450 assert_eq!(median(vec![3usize, 5, 7, 9].into_iter()), Some(6.0));
1451 assert_eq!(median(vec![3usize, 5, 7].into_iter()), Some(5.0));
1452 }
1453
1454 #[test]
1455 fn mad_stream() {
1456 assert_eq!(mad(vec![3usize, 5, 7, 9].into_iter(), None), Some(2.0));
1457 assert_eq!(
1458 mad(
1459 vec![
1460 86usize, 60, 95, 39, 49, 12, 56, 82, 92, 24, 33, 28, 46, 34, 100, 39, 100, 38,
1461 50, 61, 39, 88, 5, 13, 64
1462 ]
1463 .into_iter(),
1464 None
1465 ),
1466 Some(16.0)
1467 );
1468 }
1469
1470 #[test]
1471 fn mad_stream_precalc_median() {
1472 let data = vec![3usize, 5, 7, 9].into_iter();
1473 let median1 = median(data.clone());
1474 assert_eq!(mad(data, median1), Some(2.0));
1475
1476 let data2 = vec![
1477 86usize, 60, 95, 39, 49, 12, 56, 82, 92, 24, 33, 28, 46, 34, 100, 39, 100, 38, 50, 61,
1478 39, 88, 5, 13, 64,
1479 ]
1480 .into_iter();
1481 let median2 = median(data2.clone());
1482 assert_eq!(mad(data2, median2), Some(16.0));
1483 }
1484
1485 #[test]
1486 fn mode_stream() {
1487 assert_eq!(mode(vec![3usize, 5, 7, 9].into_iter()), None);
1488 assert_eq!(mode(vec![3usize, 3, 3, 3].into_iter()), Some(3));
1489 assert_eq!(mode(vec![3usize, 3, 3, 4].into_iter()), Some(3));
1490 assert_eq!(mode(vec![4usize, 3, 3, 3].into_iter()), Some(3));
1491 assert_eq!(mode(vec![1usize, 1, 2, 3, 3].into_iter()), None);
1492 }
1493
1494 #[test]
1495 fn median_floats() {
1496 assert_eq!(median(vec![3.0f64, 5.0, 7.0, 9.0].into_iter()), Some(6.0));
1497 assert_eq!(median(vec![3.0f64, 5.0, 7.0].into_iter()), Some(5.0));
1498 }
1499
1500 #[test]
1501 fn mode_floats() {
1502 assert_eq!(mode(vec![3.0f64, 5.0, 7.0, 9.0].into_iter()), None);
1503 assert_eq!(mode(vec![3.0f64, 3.0, 3.0, 3.0].into_iter()), Some(3.0));
1504 assert_eq!(mode(vec![3.0f64, 3.0, 3.0, 4.0].into_iter()), Some(3.0));
1505 assert_eq!(mode(vec![4.0f64, 3.0, 3.0, 3.0].into_iter()), Some(3.0));
1506 assert_eq!(mode(vec![1.0f64, 1.0, 2.0, 3.0, 3.0].into_iter()), None);
1507 }
1508
1509 #[test]
1510 fn modes_stream() {
1511 assert_eq!(modes(vec![3usize, 5, 7, 9].into_iter()), (vec![], 0, 0));
1512 assert_eq!(modes(vec![3usize, 3, 3, 3].into_iter()), (vec![3], 1, 4));
1513 assert_eq!(modes(vec![3usize, 3, 4, 4].into_iter()), (vec![3, 4], 2, 2));
1514 assert_eq!(modes(vec![4usize, 3, 3, 3].into_iter()), (vec![3], 1, 3));
1515 assert_eq!(modes(vec![1usize, 1, 2, 2].into_iter()), (vec![1, 2], 2, 2));
1516 let vec: Vec<u32> = vec![];
1517 assert_eq!(modes(vec.into_iter()), (vec![], 0, 0));
1518 }
1519
1520 #[test]
1521 fn modes_floats() {
1522 assert_eq!(
1523 modes(vec![3_f64, 5.0, 7.0, 9.0].into_iter()),
1524 (vec![], 0, 0)
1525 );
1526 assert_eq!(
1527 modes(vec![3_f64, 3.0, 3.0, 3.0].into_iter()),
1528 (vec![3.0], 1, 4)
1529 );
1530 assert_eq!(
1531 modes(vec![3_f64, 3.0, 4.0, 4.0].into_iter()),
1532 (vec![3.0, 4.0], 2, 2)
1533 );
1534 assert_eq!(
1535 modes(vec![1_f64, 1.0, 2.0, 3.0, 3.0].into_iter()),
1536 (vec![1.0, 3.0], 2, 2)
1537 );
1538 }
1539
1540 #[test]
1541 fn antimodes_stream() {
1542 assert_eq!(
1543 antimodes(vec![3usize, 5, 7, 9].into_iter()),
1544 (vec![3, 5, 7, 9], 4, 1)
1545 );
1546 assert_eq!(
1547 antimodes(vec![1usize, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13].into_iter()),
1548 (vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 13, 1)
1549 );
1550 assert_eq!(
1551 antimodes(vec![1usize, 3, 3, 3].into_iter()),
1552 (vec![1], 1, 1)
1553 );
1554 assert_eq!(
1555 antimodes(vec![3usize, 3, 4, 4].into_iter()),
1556 (vec![3, 4], 2, 2)
1557 );
1558 assert_eq!(
1559 antimodes(
1560 vec![
1561 3usize, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
1562 14, 14, 15, 15
1563 ]
1564 .into_iter()
1565 ),
1566 (vec![3, 4, 5, 6, 7, 8, 9, 10, 11, 12], 13, 2)
1568 );
1569 assert_eq!(
1570 antimodes(
1571 vec![
1572 3usize, 3, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 4, 4, 5, 5, 6, 6, 7, 7, 13, 13,
1573 14, 14, 15, 15
1574 ]
1575 .into_iter()
1576 ),
1577 (vec![3, 4, 5, 6, 7, 8, 9, 10, 11, 12], 13, 2)
1578 );
1579 assert_eq!(
1580 antimodes(vec![3usize, 3, 3, 4].into_iter()),
1581 (vec![4], 1, 1)
1582 );
1583 assert_eq!(
1584 antimodes(vec![4usize, 3, 3, 3].into_iter()),
1585 (vec![4], 1, 1)
1586 );
1587 assert_eq!(
1588 antimodes(vec![1usize, 1, 2, 2].into_iter()),
1589 (vec![1, 2], 2, 2)
1590 );
1591 let vec: Vec<u32> = vec![];
1592 assert_eq!(antimodes(vec.into_iter()), (vec![], 0, 0));
1593 }
1594
1595 #[test]
1596 fn antimodes_floats() {
1597 assert_eq!(
1598 antimodes(vec![3_f64, 5.0, 7.0, 9.0].into_iter()),
1599 (vec![3.0, 5.0, 7.0, 9.0], 4, 1)
1600 );
1601 assert_eq!(
1602 antimodes(vec![3_f64, 3.0, 3.0, 3.0].into_iter()),
1603 (vec![], 0, 0)
1604 );
1605 assert_eq!(
1606 antimodes(vec![3_f64, 3.0, 4.0, 4.0].into_iter()),
1607 (vec![3.0, 4.0], 2, 2)
1608 );
1609 assert_eq!(
1610 antimodes(vec![1_f64, 1.0, 2.0, 3.0, 3.0].into_iter()),
1611 (vec![2.0], 1, 1)
1612 );
1613 }
1614
1615 #[test]
1616 fn test_custom_percentiles() {
1617 let mut unsorted: Unsorted<i32> = Unsorted::new();
1619 unsorted.extend(1..=11); let result = unsorted.custom_percentiles(&[25, 50, 75]).unwrap();
1622 assert_eq!(result, vec![3, 6, 9]);
1623
1624 let mut str_data = Unsorted::new();
1626 str_data.extend(vec!["a", "b", "c", "d", "e"]);
1627 let result = str_data.custom_percentiles(&[20, 40, 60, 80]).unwrap();
1628 assert_eq!(result, vec!["a", "b", "c", "d"]);
1629
1630 let mut char_data = Unsorted::new();
1632 char_data.extend('a'..='e');
1633 let result = char_data.custom_percentiles(&[25, 50, 75]).unwrap();
1634 assert_eq!(result, vec!['b', 'c', 'd']);
1635
1636 let mut float_data = Unsorted::new();
1638 float_data.extend(vec![1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9]);
1639 let result = float_data
1640 .custom_percentiles(&[10, 30, 50, 70, 90])
1641 .unwrap();
1642 assert_eq!(result, vec![1.1, 3.3, 5.5, 7.7, 9.9]);
1643
1644 let result = float_data.custom_percentiles(&[]).unwrap();
1646 assert_eq!(result, Vec::<f64>::new());
1647
1648 let result = float_data.custom_percentiles(&[50, 50, 50]).unwrap();
1650 assert_eq!(result, vec![5.5]);
1651
1652 let result = float_data.custom_percentiles(&[0, 100]).unwrap();
1654 assert_eq!(result, vec![1.1, 9.9]);
1655
1656 let result = float_data.custom_percentiles(&[75, 25, 50]).unwrap();
1658 assert_eq!(result, vec![3.3, 5.5, 7.7]); let mut single = Unsorted::new();
1662 single.add(42);
1663 let result = single.custom_percentiles(&[0, 50, 100]).unwrap();
1664 assert_eq!(result, vec![42, 42, 42]);
1665 }
1666
1667 #[test]
1668 fn quartiles_stream() {
1669 assert_eq!(
1670 quartiles(vec![3usize, 5, 7].into_iter()),
1671 Some((3., 5., 7.))
1672 );
1673 assert_eq!(
1674 quartiles(vec![3usize, 5, 7, 9].into_iter()),
1675 Some((4., 6., 8.))
1676 );
1677 assert_eq!(
1678 quartiles(vec![1usize, 2, 7, 11].into_iter()),
1679 Some((1.5, 4.5, 9.))
1680 );
1681 assert_eq!(
1682 quartiles(vec![3usize, 5, 7, 9, 12].into_iter()),
1683 Some((4., 7., 10.5))
1684 );
1685 assert_eq!(
1686 quartiles(vec![2usize, 2, 3, 8, 10].into_iter()),
1687 Some((2., 3., 9.))
1688 );
1689 assert_eq!(
1690 quartiles(vec![3usize, 5, 7, 9, 12, 20].into_iter()),
1691 Some((5., 8., 12.))
1692 );
1693 assert_eq!(
1694 quartiles(vec![0usize, 2, 4, 8, 10, 11].into_iter()),
1695 Some((2., 6., 10.))
1696 );
1697 assert_eq!(
1698 quartiles(vec![3usize, 5, 7, 9, 12, 20, 21].into_iter()),
1699 Some((5., 9., 20.))
1700 );
1701 assert_eq!(
1702 quartiles(vec![1usize, 5, 6, 6, 7, 10, 19].into_iter()),
1703 Some((5., 6., 10.))
1704 );
1705 }
1706
1707 #[test]
1708 fn quartiles_floats() {
1709 assert_eq!(
1710 quartiles(vec![3_f64, 5., 7.].into_iter()),
1711 Some((3., 5., 7.))
1712 );
1713 assert_eq!(
1714 quartiles(vec![3_f64, 5., 7., 9.].into_iter()),
1715 Some((4., 6., 8.))
1716 );
1717 assert_eq!(
1718 quartiles(vec![3_f64, 5., 7., 9., 12.].into_iter()),
1719 Some((4., 7., 10.5))
1720 );
1721 assert_eq!(
1722 quartiles(vec![3_f64, 5., 7., 9., 12., 20.].into_iter()),
1723 Some((5., 8., 12.))
1724 );
1725 assert_eq!(
1726 quartiles(vec![3_f64, 5., 7., 9., 12., 20., 21.].into_iter()),
1727 Some((5., 9., 20.))
1728 );
1729 }
1730
1731 #[test]
1732 fn test_quartiles_zero_copy_small() {
1733 let unsorted: Unsorted<i32> = Unsorted::new();
1735 assert_eq!(unsorted.quartiles_zero_copy(), None);
1736
1737 let mut unsorted = Unsorted::new();
1738 unsorted.extend(vec![1, 2]);
1739 assert_eq!(unsorted.quartiles_zero_copy(), None);
1740
1741 let mut unsorted = Unsorted::new();
1742 unsorted.extend(vec![1, 2, 3]);
1743 assert_eq!(unsorted.quartiles_zero_copy(), Some((1.0, 2.0, 3.0)));
1744
1745 let mut unsorted = Unsorted::new();
1747 unsorted.extend(vec![3, 5, 7, 9]);
1748 assert_eq!(unsorted.quartiles_zero_copy(), Some((4.0, 6.0, 8.0)));
1749 }
1750}
1751
1752#[cfg(test)]
1753mod bench {
1754 use super::*;
1755 use std::time::Instant;
1756
1757 #[test]
1758 #[ignore] fn comprehensive_quartiles_benchmark() {
1760 let data_sizes = vec![
1762 1_000, 10_000, 100_000, 500_000, 1_000_000, 2_000_000, 5_000_000, 10_000_000,
1763 ];
1764
1765 println!("=== COMPREHENSIVE QUARTILES BENCHMARK ===\n");
1766
1767 for size in data_sizes {
1768 println!("--- Testing with {} elements ---", size);
1769
1770 let test_patterns = vec![
1772 ("Random", generate_random_data(size)),
1773 ("Reverse Sorted", {
1774 let mut v = Vec::with_capacity(size);
1775 for x in (0..size).rev() {
1776 v.push(x as i32);
1777 }
1778 v
1779 }),
1780 ("Already Sorted", {
1781 let mut v = Vec::with_capacity(size);
1782 for x in 0..size {
1783 v.push(x as i32);
1784 }
1785 v
1786 }),
1787 ("Many Duplicates", {
1788 let mut v = Vec::with_capacity(size);
1790 let chunk_size = size / 100;
1791 for i in 0..100 {
1792 v.extend(std::iter::repeat_n(i, chunk_size));
1793 }
1794 v.extend(std::iter::repeat_n(0, size - v.len()));
1796 v
1797 }),
1798 ];
1799
1800 for (pattern_name, test_data) in test_patterns {
1801 println!("\n Pattern: {}", pattern_name);
1802
1803 let mut unsorted1 = Unsorted::new();
1805 unsorted1.extend(test_data.clone());
1806
1807 let start = Instant::now();
1808 let result_sorted = unsorted1.quartiles();
1809 let sorted_time = start.elapsed();
1810
1811 let mut unsorted2 = Unsorted::new();
1813 unsorted2.extend(test_data.clone());
1814
1815 let start = Instant::now();
1816 let result_selection = unsorted2.quartiles_with_selection();
1817 let selection_time = start.elapsed();
1818
1819 let mut unsorted3 = Unsorted::new();
1821 unsorted3.extend(test_data);
1822
1823 let start = Instant::now();
1824 let result_zero_copy = unsorted3.quartiles_zero_copy();
1825 let zero_copy_time = start.elapsed();
1826
1827 assert_eq!(result_sorted, result_selection);
1829 assert_eq!(result_sorted, result_zero_copy);
1830
1831 let selection_speedup =
1832 sorted_time.as_nanos() as f64 / selection_time.as_nanos() as f64;
1833 let zero_copy_speedup =
1834 sorted_time.as_nanos() as f64 / zero_copy_time.as_nanos() as f64;
1835
1836 println!(" Sorting: {:>12?}", sorted_time);
1837 println!(
1838 " Selection: {:>12?} (speedup: {:.2}x)",
1839 selection_time, selection_speedup
1840 );
1841 println!(
1842 " Zero-copy: {:>12?} (speedup: {:.2}x)",
1843 zero_copy_time, zero_copy_speedup
1844 );
1845
1846 let best_algorithm =
1847 if zero_copy_speedup > 1.0 && zero_copy_speedup >= selection_speedup {
1848 "ZERO-COPY"
1849 } else if selection_speedup > 1.0 {
1850 "SELECTION"
1851 } else {
1852 "SORTING"
1853 };
1854 println!(" Best: {}", best_algorithm);
1855 }
1856
1857 println!(); }
1859 }
1860
1861 fn generate_random_data(size: usize) -> Vec<i32> {
1863 let mut rng = 1234567u64;
1865 let mut vec = Vec::with_capacity(size);
1866 for _ in 0..size {
1867 rng = rng.wrapping_mul(1103515245).wrapping_add(12345);
1868 vec.push((rng >> 16) as i32);
1869 }
1870 vec
1871 }
1872
1873 #[test]
1874 #[ignore] fn find_selection_threshold() {
1876 println!("=== FINDING SELECTION ALGORITHM THRESHOLD ===\n");
1877
1878 let mut found_threshold = None;
1880 let test_sizes = vec![
1881 1_000_000, 2_000_000, 3_000_000, 4_000_000, 5_000_000, 7_500_000, 10_000_000,
1882 15_000_000, 20_000_000, 25_000_000, 30_000_000,
1883 ];
1884
1885 for size in test_sizes {
1886 println!("Testing size: {}", size);
1887
1888 let test_data = generate_random_data(size);
1890
1891 let iterations = 3;
1893 let mut sorting_total = 0u128;
1894 let mut selection_total = 0u128;
1895 let mut zero_copy_total = 0u128;
1896
1897 for i in 0..iterations {
1898 println!(" Iteration {}/{}", i + 1, iterations);
1899
1900 let mut unsorted1 = Unsorted::new();
1902 unsorted1.extend(test_data.clone());
1903
1904 let start = Instant::now();
1905 let _result_sorted = unsorted1.quartiles();
1906 sorting_total += start.elapsed().as_nanos();
1907
1908 let mut unsorted2 = Unsorted::new();
1910 unsorted2.extend(test_data.clone());
1911
1912 let start = Instant::now();
1913 let _result_selection = unsorted2.quartiles_with_selection();
1914 selection_total += start.elapsed().as_nanos();
1915
1916 let mut unsorted3 = Unsorted::new();
1918 unsorted3.extend(test_data.clone());
1919
1920 let start = Instant::now();
1921 let _result_zero_copy = unsorted3.quartiles_zero_copy();
1922 zero_copy_total += start.elapsed().as_nanos();
1923 }
1924
1925 let avg_sorting = sorting_total / iterations as u128;
1926 let avg_selection = selection_total / iterations as u128;
1927 let avg_zero_copy = zero_copy_total / iterations as u128;
1928 let selection_speedup = avg_sorting as f64 / avg_selection as f64;
1929 let zero_copy_speedup = avg_sorting as f64 / avg_zero_copy as f64;
1930
1931 println!(
1932 " Average sorting: {:>12.2}ms",
1933 avg_sorting as f64 / 1_000_000.0
1934 );
1935 println!(
1936 " Average selection: {:>12.2}ms (speedup: {:.2}x)",
1937 avg_selection as f64 / 1_000_000.0,
1938 selection_speedup
1939 );
1940 println!(
1941 " Average zero-copy: {:>12.2}ms (speedup: {:.2}x)",
1942 avg_zero_copy as f64 / 1_000_000.0,
1943 zero_copy_speedup
1944 );
1945
1946 if (selection_speedup > 1.0 || zero_copy_speedup > 1.0) && found_threshold.is_none() {
1947 found_threshold = Some(size);
1948 let best_method = if zero_copy_speedup > selection_speedup {
1949 "Zero-copy"
1950 } else {
1951 "Selection"
1952 };
1953 println!(
1954 " *** THRESHOLD FOUND: {} becomes faster at {} elements ***",
1955 best_method, size
1956 );
1957 }
1958
1959 println!();
1960 }
1961
1962 match found_threshold {
1963 Some(threshold) => println!(
1964 "🎯 Selection algorithm becomes faster at approximately {} elements",
1965 threshold
1966 ),
1967 None => println!("❌ Selection algorithm did not become faster in the tested range"),
1968 }
1969 }
1970
1971 #[test]
1972 #[ignore] fn benchmark_different_data_types() {
1974 println!("=== BENCHMARKING DIFFERENT DATA TYPES ===\n");
1975
1976 let size = 5_000_000; println!("Testing with f64 data:");
1980 let float_data: Vec<f64> = generate_random_data(size)
1981 .into_iter()
1982 .map(|x| x as f64 / 1000.0)
1983 .collect();
1984
1985 let mut unsorted1 = Unsorted::new();
1986 unsorted1.extend(float_data.clone());
1987 let start = Instant::now();
1988 let _result = unsorted1.quartiles();
1989 let sorting_time = start.elapsed();
1990
1991 let mut unsorted2 = Unsorted::new();
1992 unsorted2.extend(float_data.clone());
1993 let start = Instant::now();
1994 let _result = unsorted2.quartiles_with_selection();
1995 let selection_time = start.elapsed();
1996
1997 let mut unsorted3 = Unsorted::new();
1998 unsorted3.extend(float_data);
1999 let start = Instant::now();
2000 let _result = unsorted3.quartiles_zero_copy();
2001 let zero_copy_time = start.elapsed();
2002
2003 println!(" Sorting: {:?}", sorting_time);
2004 println!(" Selection: {:?}", selection_time);
2005 println!(" Zero-copy: {:?}", zero_copy_time);
2006 println!(
2007 " Selection Speedup: {:.2}x",
2008 sorting_time.as_nanos() as f64 / selection_time.as_nanos() as f64
2009 );
2010 println!(
2011 " Zero-copy Speedup: {:.2}x\n",
2012 sorting_time.as_nanos() as f64 / zero_copy_time.as_nanos() as f64
2013 );
2014
2015 println!("Testing with i64 data:");
2017 let int64_data: Vec<i64> = generate_random_data(size)
2018 .into_iter()
2019 .map(|x| x as i64 * 1000)
2020 .collect();
2021
2022 let mut unsorted1 = Unsorted::new();
2023 unsorted1.extend(int64_data.clone());
2024 let start = Instant::now();
2025 let _result = unsorted1.quartiles();
2026 let sorting_time = start.elapsed();
2027
2028 let mut unsorted2 = Unsorted::new();
2029 unsorted2.extend(int64_data.clone());
2030 let start = Instant::now();
2031 let _result = unsorted2.quartiles_with_selection();
2032 let selection_time = start.elapsed();
2033
2034 let mut unsorted3 = Unsorted::new();
2035 unsorted3.extend(int64_data);
2036 let start = Instant::now();
2037 let _result = unsorted3.quartiles_zero_copy();
2038 let zero_copy_time = start.elapsed();
2039
2040 println!(" Sorting: {:?}", sorting_time);
2041 println!(" Selection: {:?}", selection_time);
2042 println!(" Zero-copy: {:?}", zero_copy_time);
2043 println!(
2044 " Selection Speedup: {:.2}x",
2045 sorting_time.as_nanos() as f64 / selection_time.as_nanos() as f64
2046 );
2047 println!(
2048 " Zero-copy Speedup: {:.2}x",
2049 sorting_time.as_nanos() as f64 / zero_copy_time.as_nanos() as f64
2050 );
2051 }
2052}