1use num_traits::ToPrimitive;
2use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
3use rayon::prelude::ParallelSlice;
4use rayon::slice::ParallelSliceMut;
5
6use serde::{Deserialize, Serialize};
7
8use {crate::Commute, crate::Partial};
9
10pub fn median<I>(it: I) -> Option<f64>
14where
15 I: Iterator,
16 <I as Iterator>::Item: PartialOrd + ToPrimitive,
17{
18 it.collect::<Unsorted<_>>().median()
19}
20
21pub fn mad<I>(it: I, precalc_median: Option<f64>) -> Option<f64>
23where
24 I: Iterator,
25 <I as Iterator>::Item: PartialOrd + ToPrimitive,
26{
27 it.collect::<Unsorted<_>>().mad(precalc_median)
28}
29
30pub fn quartiles<I>(it: I) -> Option<(f64, f64, f64)>
34where
35 I: Iterator,
36 <I as Iterator>::Item: PartialOrd + ToPrimitive,
37{
38 it.collect::<Unsorted<_>>().quartiles()
39}
40
41pub fn mode<T, I>(it: I) -> Option<T>
47where
48 T: PartialOrd + Clone,
49 I: Iterator<Item = T>,
50{
51 it.collect::<Unsorted<T>>().mode()
52}
53
54pub fn modes<T, I>(it: I) -> (Vec<T>, usize, u32)
72where
73 T: PartialOrd + Clone,
74 I: Iterator<Item = T>,
75{
76 it.collect::<Unsorted<T>>().modes()
77}
78
79pub fn antimodes<T, I>(it: I) -> (Vec<T>, usize, u32)
102where
103 T: PartialOrd + Clone,
104 I: Iterator<Item = T>,
105{
106 let (antimodes_result, antimodes_count, antimodes_occurrences) =
107 it.collect::<Unsorted<T>>().antimodes();
108 (antimodes_result, antimodes_count, antimodes_occurrences)
109}
110
111fn median_on_sorted<T>(data: &[T]) -> Option<f64>
112where
113 T: PartialOrd + ToPrimitive,
114{
115 Some(match data.len() {
116 0 => return None,
118 1 => data.first()?.to_f64()?,
120 len if len.is_multiple_of(2) => {
122 let idx = len / 2;
123 let v1 = unsafe { data.get_unchecked(idx - 1) }.to_f64()?;
126 let v2 = unsafe { data.get_unchecked(idx) }.to_f64()?;
127 f64::midpoint(v1, v2)
128 }
129 len => unsafe { data.get_unchecked(len / 2) }.to_f64()?,
132 })
133}
134
135fn mad_on_sorted<T>(data: &[T], precalc_median: Option<f64>) -> Option<f64>
136where
137 T: Sync + PartialOrd + ToPrimitive,
138{
139 if data.is_empty() {
140 return None;
141 }
142 let median_obs =
143 precalc_median.map_or_else(|| median_on_sorted(data).unwrap(), |precalc| precalc);
144
145 let mut abs_diff_vec: Vec<f64> = data
146 .par_iter()
147 .map(|x| (median_obs - unsafe { x.to_f64().unwrap_unchecked() }).abs())
148 .collect();
149
150 abs_diff_vec.par_sort_unstable_by(|a, b| unsafe { a.partial_cmp(b).unwrap_unchecked() });
151 median_on_sorted(&abs_diff_vec)
152}
153
154fn quickselect<T>(data: &mut [Partial<T>], k: usize) -> Option<&T>
157where
158 T: PartialOrd,
159{
160 if data.is_empty() || k >= data.len() {
161 return None;
162 }
163
164 let mut left = 0;
165 let mut right = data.len() - 1;
166
167 loop {
168 if left == right {
169 return Some(&data[left].0);
170 }
171
172 let pivot_idx = median_of_three_pivot(data, left, right);
174 let pivot_idx = partition(data, left, right, pivot_idx);
175
176 match k.cmp(&pivot_idx) {
177 std::cmp::Ordering::Equal => return Some(&data[pivot_idx].0),
178 std::cmp::Ordering::Less => right = pivot_idx - 1,
179 std::cmp::Ordering::Greater => left = pivot_idx + 1,
180 }
181 }
182}
183
184fn quickselect_by_index<'a, T>(
187 data: &'a [Partial<T>],
188 indices: &mut [usize],
189 k: usize,
190) -> Option<&'a T>
191where
192 T: PartialOrd,
193{
194 if data.is_empty() || indices.is_empty() || k >= indices.len() {
195 return None;
196 }
197
198 let mut left = 0;
199 let mut right = indices.len() - 1;
200
201 loop {
202 if left == right {
203 return Some(&data[indices[left]].0);
204 }
205
206 let pivot_idx = median_of_three_pivot_by_index(data, indices, left, right);
208 let pivot_idx = partition_by_index(data, indices, left, right, pivot_idx);
209
210 match k.cmp(&pivot_idx) {
211 std::cmp::Ordering::Equal => return Some(&data[indices[pivot_idx]].0),
212 std::cmp::Ordering::Less => right = pivot_idx - 1,
213 std::cmp::Ordering::Greater => left = pivot_idx + 1,
214 }
215 }
216}
217
218fn median_of_three_pivot<T>(data: &[Partial<T>], left: usize, right: usize) -> usize
220where
221 T: PartialOrd,
222{
223 let mid = left + (right - left) / 2;
224
225 if data[left] <= data[mid] {
226 if data[mid] <= data[right] {
227 mid
228 } else if data[left] <= data[right] {
229 right
230 } else {
231 left
232 }
233 } else if data[left] <= data[right] {
234 left
235 } else if data[mid] <= data[right] {
236 right
237 } else {
238 mid
239 }
240}
241
242fn median_of_three_pivot_by_index<T>(
244 data: &[Partial<T>],
245 indices: &[usize],
246 left: usize,
247 right: usize,
248) -> usize
249where
250 T: PartialOrd,
251{
252 let mid = left + (right - left) / 2;
253
254 if data[indices[left]] <= data[indices[mid]] {
255 if data[indices[mid]] <= data[indices[right]] {
256 mid
257 } else if data[indices[left]] <= data[indices[right]] {
258 right
259 } else {
260 left
261 }
262 } else if data[indices[left]] <= data[indices[right]] {
263 left
264 } else if data[indices[mid]] <= data[indices[right]] {
265 right
266 } else {
267 mid
268 }
269}
270
271fn partition<T>(data: &mut [Partial<T>], left: usize, right: usize, pivot_idx: usize) -> usize
273where
274 T: PartialOrd,
275{
276 data.swap(pivot_idx, right);
278 let mut store_idx = left;
279
280 for i in left..right {
282 if data[i] <= data[right] {
283 data.swap(i, store_idx);
284 store_idx += 1;
285 }
286 }
287
288 data.swap(store_idx, right);
290 store_idx
291}
292
293fn partition_by_index<T>(
295 data: &[Partial<T>],
296 indices: &mut [usize],
297 left: usize,
298 right: usize,
299 pivot_idx: usize,
300) -> usize
301where
302 T: PartialOrd,
303{
304 indices.swap(pivot_idx, right);
306 let mut store_idx = left;
307
308 for i in left..right {
310 if data[indices[i]] <= data[indices[right]] {
311 indices.swap(i, store_idx);
312 store_idx += 1;
313 }
314 }
315
316 indices.swap(store_idx, right);
318 store_idx
319}
320
321fn quartiles_on_sorted<T>(data: &[Partial<T>]) -> Option<(f64, f64, f64)>
325where
326 T: PartialOrd + ToPrimitive,
327{
328 let len = data.len();
329
330 match len {
332 0..=2 => return None,
333 3 => {
334 return Some(
335 unsafe {
337 (
338 data.get_unchecked(0).0.to_f64()?,
339 data.get_unchecked(1).0.to_f64()?,
340 data.get_unchecked(2).0.to_f64()?,
341 )
342 },
343 );
344 }
345 _ => {}
346 }
347
348 let k = len / 4;
350 let remainder = len % 4;
351
352 unsafe {
355 Some(match remainder {
356 0 => {
357 let q1 = f64::midpoint(
368 data.get_unchecked(k - 1).0.to_f64()?,
369 data.get_unchecked(k).0.to_f64()?,
370 );
371 let q2 = f64::midpoint(
372 data.get_unchecked(2 * k - 1).0.to_f64()?,
373 data.get_unchecked(2 * k).0.to_f64()?,
374 );
375 let q3 = f64::midpoint(
376 data.get_unchecked(3 * k - 1).0.to_f64()?,
377 data.get_unchecked(3 * k).0.to_f64()?,
378 );
379 (q1, q2, q3)
380 }
381 1 => {
382 let q1 = f64::midpoint(
393 data.get_unchecked(k - 1).0.to_f64()?,
394 data.get_unchecked(k).0.to_f64()?,
395 );
396 let q2 = data.get_unchecked(2 * k).0.to_f64()?;
397 let q3 = f64::midpoint(
398 data.get_unchecked(3 * k).0.to_f64()?,
399 data.get_unchecked(3 * k + 1).0.to_f64()?,
400 );
401 (q1, q2, q3)
402 }
403 2 => {
404 let q1 = data.get_unchecked(k).0.to_f64()?;
415 let q2 = f64::midpoint(
416 data.get_unchecked(2 * k).0.to_f64()?,
417 data.get_unchecked(2 * k + 1).0.to_f64()?,
418 );
419 let q3 = data.get_unchecked(3 * k + 1).0.to_f64()?;
420 (q1, q2, q3)
421 }
422 _ => {
423 let q1 = data.get_unchecked(k).0.to_f64()?;
434 let q2 = data.get_unchecked(2 * k + 1).0.to_f64()?;
435 let q3 = data.get_unchecked(3 * k + 2).0.to_f64()?;
436 (q1, q2, q3)
437 }
438 })
439 }
440}
441
442fn quartiles_with_selection<T>(data: &mut [Partial<T>]) -> Option<(f64, f64, f64)>
445where
446 T: PartialOrd + ToPrimitive,
447{
448 let len = data.len();
449
450 match len {
452 0..=2 => return None,
453 3 => {
454 let min_val = quickselect(data, 0)?.to_f64()?;
456 let med_val = quickselect(data, 1)?.to_f64()?;
457 let max_val = quickselect(data, 2)?.to_f64()?;
458 return Some((min_val, med_val, max_val));
459 }
460 _ => {}
461 }
462
463 let k = len / 4;
465 let remainder = len % 4;
466
467 match remainder {
469 0 => {
470 let q1_low = quickselect(data, k - 1)?.to_f64()?;
475 let q1_high = quickselect(data, k)?.to_f64()?;
476 let q1 = f64::midpoint(q1_low, q1_high);
477
478 let q2_low = quickselect(data, 2 * k - 1)?.to_f64()?;
479 let q2_high = quickselect(data, 2 * k)?.to_f64()?;
480 let q2 = f64::midpoint(q2_low, q2_high);
481
482 let q3_low = quickselect(data, 3 * k - 1)?.to_f64()?;
483 let q3_high = quickselect(data, 3 * k)?.to_f64()?;
484 let q3 = f64::midpoint(q3_low, q3_high);
485
486 Some((q1, q2, q3))
487 }
488 1 => {
489 let q1_low = quickselect(data, k - 1)?.to_f64()?;
494 let q1_high = quickselect(data, k)?.to_f64()?;
495 let q1 = f64::midpoint(q1_low, q1_high);
496
497 let q2 = quickselect(data, 2 * k)?.to_f64()?;
498
499 let q3_low = quickselect(data, 3 * k)?.to_f64()?;
500 let q3_high = quickselect(data, 3 * k + 1)?.to_f64()?;
501 let q3 = f64::midpoint(q3_low, q3_high);
502
503 Some((q1, q2, q3))
504 }
505 2 => {
506 let q1 = quickselect(data, k)?.to_f64()?;
511
512 let q2_low = quickselect(data, 2 * k)?.to_f64()?;
513 let q2_high = quickselect(data, 2 * k + 1)?.to_f64()?;
514 let q2 = f64::midpoint(q2_low, q2_high);
515
516 let q3 = quickselect(data, 3 * k + 1)?.to_f64()?;
517
518 Some((q1, q2, q3))
519 }
520 _ => {
521 let q1 = quickselect(data, k)?.to_f64()?;
526 let q2 = quickselect(data, 2 * k + 1)?.to_f64()?;
527 let q3 = quickselect(data, 3 * k + 2)?.to_f64()?;
528
529 Some((q1, q2, q3))
530 }
531 }
532}
533
534fn quartiles_with_zero_copy_selection<T>(data: &[Partial<T>]) -> Option<(f64, f64, f64)>
537where
538 T: PartialOrd + ToPrimitive,
539{
540 let len = data.len();
541
542 match len {
544 0..=2 => return None,
545 3 => {
546 let mut indices = vec![0, 1, 2];
548 let min_val = quickselect_by_index(data, &mut indices, 0)?.to_f64()?;
549 let med_val = quickselect_by_index(data, &mut indices, 1)?.to_f64()?;
550 let max_val = quickselect_by_index(data, &mut indices, 2)?.to_f64()?;
551 return Some((min_val, med_val, max_val));
552 }
553 _ => {}
554 }
555
556 let mut indices: Vec<usize> = (0..len).collect();
558
559 let k = len / 4;
561 let remainder = len % 4;
562
563 match remainder {
565 0 => {
566 let q1_low = quickselect_by_index(data, &mut indices, k - 1)?.to_f64()?;
568 let q1_high = quickselect_by_index(data, &mut indices, k)?.to_f64()?;
569 let q1 = f64::midpoint(q1_low, q1_high);
570
571 let q2_low = quickselect_by_index(data, &mut indices, 2 * k - 1)?.to_f64()?;
572 let q2_high = quickselect_by_index(data, &mut indices, 2 * k)?.to_f64()?;
573 let q2 = f64::midpoint(q2_low, q2_high);
574
575 let q3_low = quickselect_by_index(data, &mut indices, 3 * k - 1)?.to_f64()?;
576 let q3_high = quickselect_by_index(data, &mut indices, 3 * k)?.to_f64()?;
577 let q3 = f64::midpoint(q3_low, q3_high);
578
579 Some((q1, q2, q3))
580 }
581 1 => {
582 let q1_low = quickselect_by_index(data, &mut indices, k - 1)?.to_f64()?;
584 let q1_high = quickselect_by_index(data, &mut indices, k)?.to_f64()?;
585 let q1 = f64::midpoint(q1_low, q1_high);
586
587 let q2 = quickselect_by_index(data, &mut indices, 2 * k)?.to_f64()?;
588
589 let q3_low = quickselect_by_index(data, &mut indices, 3 * k)?.to_f64()?;
590 let q3_high = quickselect_by_index(data, &mut indices, 3 * k + 1)?.to_f64()?;
591 let q3 = f64::midpoint(q3_low, q3_high);
592
593 Some((q1, q2, q3))
594 }
595 2 => {
596 let q1 = quickselect_by_index(data, &mut indices, k)?.to_f64()?;
598
599 let q2_low = quickselect_by_index(data, &mut indices, 2 * k)?.to_f64()?;
600 let q2_high = quickselect_by_index(data, &mut indices, 2 * k + 1)?.to_f64()?;
601 let q2 = f64::midpoint(q2_low, q2_high);
602
603 let q3 = quickselect_by_index(data, &mut indices, 3 * k + 1)?.to_f64()?;
604
605 Some((q1, q2, q3))
606 }
607 _ => {
608 let q1 = quickselect_by_index(data, &mut indices, k)?.to_f64()?;
610 let q2 = quickselect_by_index(data, &mut indices, 2 * k + 1)?.to_f64()?;
611 let q3 = quickselect_by_index(data, &mut indices, 3 * k + 2)?.to_f64()?;
612
613 Some((q1, q2, q3))
614 }
615 }
616}
617
618fn mode_on_sorted<T, I>(it: I) -> Option<T>
619where
620 T: PartialOrd,
621 I: Iterator<Item = T>,
622{
623 use std::cmp::Ordering;
624
625 let (mut mode, mut next) = (None, None);
632 let (mut mode_count, mut next_count) = (0usize, 0usize);
633 for x in it {
634 if mode.as_ref() == Some(&x) {
635 mode_count += 1;
636 } else if next.as_ref() == Some(&x) {
637 next_count += 1;
638 } else {
639 next = Some(x);
640 next_count = 0;
641 }
642
643 match next_count.cmp(&mode_count) {
644 Ordering::Greater => {
645 mode = next;
646 mode_count = next_count;
647 next = None;
648 next_count = 0;
649 }
650 Ordering::Equal => {
651 mode = None;
652 mode_count = 0;
653 }
654 Ordering::Less => {}
655 }
656 }
657 mode
658}
659
660#[inline]
693fn modes_and_antimodes_on_sorted<T, I>(
694 mut it: I,
695 size: usize,
696) -> ((Vec<T>, usize, u32), (Vec<T>, usize, u32))
697where
698 T: PartialOrd + Clone,
699 I: Iterator<Item = T>,
700{
701 let Some(first) = it.next() else {
703 return ((Vec::new(), 0, 0), (Vec::new(), 0, 0));
704 };
705
706 let mut runs: Vec<(T, u32)> =
708 Vec::with_capacity(((size as f64).sqrt() as usize).clamp(16, 1_000));
709
710 let mut current_value = first;
711 let mut current_count = 1;
712 let mut highest_count = 1;
713 let mut lowest_count = u32::MAX;
714
715 for x in it {
717 if x == current_value {
718 current_count += 1;
719 highest_count = highest_count.max(current_count);
720 } else {
721 runs.push((current_value, current_count));
722 lowest_count = lowest_count.min(current_count);
723 current_value = x;
724 current_count = 1;
725 }
726 }
727 runs.push((current_value, current_count));
728 lowest_count = lowest_count.min(current_count);
729
730 if runs.len() == 1 {
732 let (val, count) = runs.pop().unwrap();
733 return ((vec![val], 1, count), (Vec::new(), 0, 0));
734 }
735
736 if highest_count == 1 {
738 let antimodes_count = runs.len().min(10);
739 let total_count = runs.len();
740 let mut antimodes = Vec::with_capacity(antimodes_count);
741 for (val, _) in runs.into_iter().take(antimodes_count) {
742 antimodes.push(val);
743 }
744 return ((Vec::new(), 0, 0), (antimodes, total_count, 1));
746 }
747
748 let estimated_modes = (runs.len() / 10).clamp(1, 10);
752 let estimated_antimodes = 10.min(runs.len());
753
754 let mut modes_result = Vec::with_capacity(estimated_modes);
755 let mut antimodes_result = Vec::with_capacity(estimated_antimodes);
756 let mut mode_count = 0;
757 let mut antimodes_count = 0;
758 let mut antimodes_collected = 0_u32;
759
760 for (val, count) in &runs {
762 if *count == highest_count {
763 modes_result.push(val.clone());
764 mode_count += 1;
765 }
766 if *count == lowest_count {
767 antimodes_count += 1;
768 if antimodes_collected < 10 {
769 antimodes_result.push(val.clone());
770 antimodes_collected += 1;
771 }
772 }
773 }
774
775 (
776 (modes_result, mode_count, highest_count),
777 (antimodes_result, antimodes_count, lowest_count),
778 )
779}
780
781#[derive(Clone, Serialize, Deserialize, Eq, PartialEq)]
789pub struct Unsorted<T> {
790 sorted: bool,
791 data: Vec<Partial<T>>,
792}
793
794impl<T: PartialOrd> Unsorted<T> {
795 #[inline]
797 #[must_use]
798 pub fn new() -> Unsorted<T> {
799 Default::default()
800 }
801
802 #[allow(clippy::inline_always)]
804 #[inline(always)]
805 pub fn add(&mut self, v: T) {
806 self.sorted = false;
807 self.data.push(Partial(v));
808 }
809
810 #[inline]
812 #[must_use]
813 pub const fn len(&self) -> usize {
814 self.data.len()
815 }
816
817 #[inline]
818 #[must_use]
819 pub const fn is_empty(&self) -> bool {
820 self.data.is_empty()
821 }
822
823 #[inline]
824 fn sort(&mut self) {
825 if !self.sorted {
826 self.data.par_sort_unstable();
827 self.sorted = true;
828 }
829 }
830
831 #[inline]
832 const fn already_sorted(&mut self) {
833 self.sorted = true;
834 }
835
836 #[inline]
838 pub fn add_bulk(&mut self, values: Vec<T>) {
839 self.sorted = false;
840 self.data.reserve(values.len());
841 self.data.extend(values.into_iter().map(Partial));
842 }
843
844 #[inline]
846 pub fn shrink_to_fit(&mut self) {
847 self.data.shrink_to_fit();
848 }
849
850 #[inline]
852 #[must_use]
853 pub fn with_capacity(capacity: usize) -> Self {
854 Unsorted {
855 sorted: true,
856 data: Vec::with_capacity(capacity),
857 }
858 }
859
860 #[inline]
862 pub fn push_ascending(&mut self, value: T) {
863 if let Some(last) = self.data.last() {
864 debug_assert!(last.0 <= value, "Value must be >= than last element");
865 }
866 self.data.push(Partial(value));
867 }
869}
870
871impl<T: PartialOrd + PartialEq + Clone> Unsorted<T> {
872 #[inline]
873 pub fn cardinality(&mut self, sorted: bool, parallel_threshold: usize) -> u64 {
881 const CHUNK_SIZE: usize = 2048; const DEFAULT_PARALLEL_THRESHOLD: usize = 10_240; let len = self.data.len();
885 match len {
886 0 => return 0,
887 1 => return 1,
888 _ => {}
889 }
890
891 if sorted {
892 self.already_sorted();
893 } else {
894 self.sort();
895 }
896
897 let use_parallel = parallel_threshold != 0
898 && (parallel_threshold == 1
899 || len > parallel_threshold.max(DEFAULT_PARALLEL_THRESHOLD));
900
901 if use_parallel {
902 let chunks = self.data.par_chunks(CHUNK_SIZE);
904
905 let chunk_results: Vec<u64> = chunks
907 .map(|chunk| {
908 let mut count = 1; for window in chunk.windows(2) {
911 if unsafe { window.get_unchecked(0) != window.get_unchecked(1) } {
913 count += 1;
914 }
915 }
916 count
917 })
918 .collect();
919
920 let mut total = 0;
922 for (i, &count) in chunk_results.iter().enumerate() {
923 total += count;
924
925 if i > 0 {
927 unsafe {
933 let prev_chunk_end = self.data.get_unchecked((i * CHUNK_SIZE) - 1);
934 let curr_chunk_start = self.data.get_unchecked(i * CHUNK_SIZE);
935 if prev_chunk_end == curr_chunk_start {
936 total -= 1;
937 }
938 }
939 }
940 }
941
942 total
943 } else {
944 let mut count = u64::from(!self.data.is_empty());
949
950 for window in self.data.windows(2) {
951 if unsafe { window.get_unchecked(0) != window.get_unchecked(1) } {
953 count += 1;
954 }
955 }
956 count
957 }
958 }
959}
960
961impl<T: PartialOrd + Clone> Unsorted<T> {
962 #[inline]
964 pub fn mode(&mut self) -> Option<T> {
965 if self.data.is_empty() {
966 return None;
967 }
968 self.sort();
969 mode_on_sorted(self.data.iter().map(|p| &p.0)).cloned()
970 }
971
972 #[inline]
976 fn modes(&mut self) -> (Vec<T>, usize, u32) {
977 if self.data.is_empty() {
978 return (Vec::new(), 0, 0);
979 }
980 self.sort();
981 modes_and_antimodes_on_sorted(self.data.iter().map(|p| p.0.clone()), self.len()).0
982 }
983
984 #[inline]
987 fn antimodes(&mut self) -> (Vec<T>, usize, u32) {
988 if self.data.is_empty() {
989 return (Vec::new(), 0, 0);
990 }
991 self.sort();
992 modes_and_antimodes_on_sorted(self.data.iter().map(|p| p.0.clone()), self.len()).1
993 }
994
995 #[inline]
998 pub fn modes_antimodes(&mut self) -> ((Vec<T>, usize, u32), (Vec<T>, usize, u32)) {
999 if self.data.is_empty() {
1000 return ((Vec::new(), 0, 0), (Vec::new(), 0, 0));
1001 }
1002 self.sort();
1003 modes_and_antimodes_on_sorted(self.data.iter().map(|p| p.0.clone()), self.len())
1004 }
1005}
1006
1007impl<T: PartialOrd + ToPrimitive> Unsorted<T> {
1008 #[inline]
1010 pub fn median(&mut self) -> Option<f64> {
1011 if self.data.is_empty() {
1012 return None;
1013 }
1014 self.sort();
1015 median_on_sorted(&self.data)
1016 }
1017}
1018
1019impl<T: PartialOrd + ToPrimitive> Unsorted<T> {
1020 #[inline]
1022 pub fn mad(&mut self, existing_median: Option<f64>) -> Option<f64> {
1023 if self.data.is_empty() {
1024 return None;
1025 }
1026 if existing_median.is_none() {
1027 self.sort();
1028 }
1029 mad_on_sorted(&self.data, existing_median)
1030 }
1031}
1032
1033impl<T: PartialOrd + ToPrimitive> Unsorted<T> {
1034 #[inline]
1039 pub fn quartiles(&mut self) -> Option<(f64, f64, f64)> {
1040 if self.data.is_empty() {
1041 return None;
1042 }
1043 self.sort();
1044 quartiles_on_sorted(&self.data)
1045 }
1046}
1047
1048impl<T: PartialOrd + ToPrimitive + Clone> Unsorted<T> {
1049 #[inline]
1061 pub fn quartiles_with_selection(&mut self) -> Option<(f64, f64, f64)> {
1062 if self.data.is_empty() {
1063 return None;
1064 }
1065 let mut data_copy: Vec<Partial<T>> =
1067 self.data.iter().map(|x| Partial(x.0.clone())).collect();
1068 quartiles_with_selection(&mut data_copy)
1069 }
1070}
1071
1072impl<T: PartialOrd + ToPrimitive> Unsorted<T> {
1073 #[inline]
1079 #[must_use]
1080 pub fn quartiles_zero_copy(&self) -> Option<(f64, f64, f64)> {
1081 if self.data.is_empty() {
1082 return None;
1083 }
1084 quartiles_with_zero_copy_selection(&self.data)
1085 }
1086}
1087
1088impl<T: PartialOrd> Commute for Unsorted<T> {
1089 #[inline]
1090 fn merge(&mut self, mut v: Unsorted<T>) {
1091 if v.is_empty() {
1092 return;
1093 }
1094
1095 self.sorted = false;
1096 self.data.extend(std::mem::take(&mut v.data));
1098 }
1099}
1100
1101impl<T: PartialOrd> Default for Unsorted<T> {
1102 #[inline]
1103 fn default() -> Unsorted<T> {
1104 Unsorted {
1105 data: Vec::with_capacity(10_000),
1106 sorted: true, }
1108 }
1109}
1110
1111impl<T: PartialOrd> FromIterator<T> for Unsorted<T> {
1112 #[inline]
1113 fn from_iter<I: IntoIterator<Item = T>>(it: I) -> Unsorted<T> {
1114 let mut v = Unsorted::new();
1115 v.extend(it);
1116 v
1117 }
1118}
1119
1120impl<T: PartialOrd> Extend<T> for Unsorted<T> {
1121 #[inline]
1122 fn extend<I: IntoIterator<Item = T>>(&mut self, it: I) {
1123 self.sorted = false;
1124 self.data.extend(it.into_iter().map(Partial));
1125 }
1126}
1127
1128fn custom_percentiles_on_sorted<T>(data: &[Partial<T>], percentiles: &[u8]) -> Option<Vec<T>>
1129where
1130 T: PartialOrd + Clone,
1131{
1132 let len = data.len();
1133
1134 if len == 0 || percentiles.iter().any(|&p| p > 100) {
1136 return None;
1137 }
1138
1139 let mut unique_percentiles = percentiles.to_vec();
1141 unique_percentiles.sort_unstable();
1142 unique_percentiles.dedup();
1143
1144 let mut results = Vec::with_capacity(unique_percentiles.len());
1145
1146 unsafe {
1150 for &p in &unique_percentiles {
1151 let rank = ((f64::from(p) / 100.0) * len as f64).ceil() as usize;
1155
1156 let idx = rank.saturating_sub(1);
1158
1159 results.push(data.get_unchecked(idx).0.clone());
1161 }
1162 }
1163
1164 Some(results)
1165}
1166
1167impl<T: PartialOrd + Clone> Unsorted<T> {
1168 #[inline]
1190 pub fn custom_percentiles(&mut self, percentiles: &[u8]) -> Option<Vec<T>> {
1191 if self.data.is_empty() {
1192 return None;
1193 }
1194 self.sort();
1195 custom_percentiles_on_sorted(&self.data, percentiles)
1196 }
1197}
1198
1199#[cfg(test)]
1200mod test {
1201 use super::*;
1202
1203 #[test]
1204 fn test_cardinality_empty() {
1205 let mut unsorted: Unsorted<i32> = Unsorted::new();
1206 assert_eq!(unsorted.cardinality(false, 1), 0);
1207 }
1208
1209 #[test]
1210 fn test_cardinality_single_element() {
1211 let mut unsorted = Unsorted::new();
1212 unsorted.add(5);
1213 assert_eq!(unsorted.cardinality(false, 1), 1);
1214 }
1215
1216 #[test]
1217 fn test_cardinality_unique_elements() {
1218 let mut unsorted = Unsorted::new();
1219 unsorted.extend(vec![1, 2, 3, 4, 5]);
1220 assert_eq!(unsorted.cardinality(false, 1), 5);
1221 }
1222
1223 #[test]
1224 fn test_cardinality_duplicate_elements() {
1225 let mut unsorted = Unsorted::new();
1226 unsorted.extend(vec![1, 2, 2, 3, 3, 3, 4, 4, 4, 4]);
1227 assert_eq!(unsorted.cardinality(false, 1), 4);
1228 }
1229
1230 #[test]
1231 fn test_cardinality_all_same() {
1232 let mut unsorted = Unsorted::new();
1233 unsorted.extend(vec![1; 100]);
1234 assert_eq!(unsorted.cardinality(false, 1), 1);
1235 }
1236
1237 #[test]
1238 fn test_cardinality_large_range() {
1239 let mut unsorted = Unsorted::new();
1240 unsorted.extend(0..1_000_000);
1241 assert_eq!(unsorted.cardinality(false, 1), 1_000_000);
1242 }
1243
1244 #[test]
1245 fn test_cardinality_large_range_sequential() {
1246 let mut unsorted = Unsorted::new();
1247 unsorted.extend(0..1_000_000);
1248 assert_eq!(unsorted.cardinality(false, 2_000_000), 1_000_000);
1249 }
1250
1251 #[test]
1252 fn test_cardinality_presorted() {
1253 let mut unsorted = Unsorted::new();
1254 unsorted.extend(vec![1, 2, 3, 4, 5]);
1255 unsorted.sort();
1256 assert_eq!(unsorted.cardinality(true, 1), 5);
1257 }
1258
1259 #[test]
1260 fn test_cardinality_float() {
1261 let mut unsorted = Unsorted::new();
1262 unsorted.extend(vec![1.0, 1.0, 2.0, 3.0, 3.0, 4.0]);
1263 assert_eq!(unsorted.cardinality(false, 1), 4);
1264 }
1265
1266 #[test]
1267 fn test_cardinality_string() {
1268 let mut unsorted = Unsorted::new();
1269 unsorted.extend(vec!["a", "b", "b", "c", "c", "c"]);
1270 assert_eq!(unsorted.cardinality(false, 1), 3);
1271 }
1272
1273 #[test]
1274 fn test_quartiles_selection_vs_sorted() {
1275 let test_cases = vec![
1277 vec![3, 5, 7, 9],
1278 vec![3, 5, 7],
1279 vec![1, 2, 7, 11],
1280 vec![3, 5, 7, 9, 12],
1281 vec![2, 2, 3, 8, 10],
1282 vec![3, 5, 7, 9, 12, 20],
1283 vec![0, 2, 4, 8, 10, 11],
1284 vec![3, 5, 7, 9, 12, 20, 21],
1285 vec![1, 5, 6, 6, 7, 10, 19],
1286 ];
1287
1288 for test_case in test_cases {
1289 let mut unsorted1 = Unsorted::new();
1290 let mut unsorted2 = Unsorted::new();
1291 let mut unsorted3 = Unsorted::new();
1292 unsorted1.extend(test_case.clone());
1293 unsorted2.extend(test_case.clone());
1294 unsorted3.extend(test_case.clone());
1295
1296 let result_sorted = unsorted1.quartiles();
1297 let result_selection = unsorted2.quartiles_with_selection();
1298 let result_zero_copy = unsorted3.quartiles_zero_copy();
1299
1300 assert_eq!(
1301 result_sorted, result_selection,
1302 "Selection mismatch for test case: {:?}",
1303 test_case
1304 );
1305 assert_eq!(
1306 result_sorted, result_zero_copy,
1307 "Zero-copy mismatch for test case: {:?}",
1308 test_case
1309 );
1310 }
1311 }
1312
1313 #[test]
1314 fn test_quartiles_with_selection_small() {
1315 let mut unsorted: Unsorted<i32> = Unsorted::new();
1317 assert_eq!(unsorted.quartiles_with_selection(), None);
1318
1319 let mut unsorted = Unsorted::new();
1320 unsorted.extend(vec![1, 2]);
1321 assert_eq!(unsorted.quartiles_with_selection(), None);
1322
1323 let mut unsorted = Unsorted::new();
1324 unsorted.extend(vec![1, 2, 3]);
1325 assert_eq!(unsorted.quartiles_with_selection(), Some((1.0, 2.0, 3.0)));
1326 }
1327
1328 #[test]
1329 fn test_quickselect() {
1330 let data = vec![
1331 Partial(3),
1332 Partial(1),
1333 Partial(4),
1334 Partial(1),
1335 Partial(5),
1336 Partial(9),
1337 Partial(2),
1338 Partial(6),
1339 ];
1340
1341 assert_eq!(quickselect(&mut data.clone(), 0), Some(&1));
1343 assert_eq!(quickselect(&mut data.clone(), 3), Some(&3));
1344 assert_eq!(quickselect(&mut data.clone(), 7), Some(&9));
1345
1346 let mut empty: Vec<Partial<i32>> = vec![];
1348 assert_eq!(quickselect(&mut empty, 0), None);
1349
1350 let mut data = vec![Partial(3), Partial(1), Partial(4), Partial(1), Partial(5)];
1351 assert_eq!(quickselect(&mut data, 10), None); }
1353
1354 #[test]
1355 fn median_stream() {
1356 assert_eq!(median(vec![3usize, 5, 7, 9].into_iter()), Some(6.0));
1357 assert_eq!(median(vec![3usize, 5, 7].into_iter()), Some(5.0));
1358 }
1359
1360 #[test]
1361 fn mad_stream() {
1362 assert_eq!(mad(vec![3usize, 5, 7, 9].into_iter(), None), Some(2.0));
1363 assert_eq!(
1364 mad(
1365 vec![
1366 86usize, 60, 95, 39, 49, 12, 56, 82, 92, 24, 33, 28, 46, 34, 100, 39, 100, 38,
1367 50, 61, 39, 88, 5, 13, 64
1368 ]
1369 .into_iter(),
1370 None
1371 ),
1372 Some(16.0)
1373 );
1374 }
1375
1376 #[test]
1377 fn mad_stream_precalc_median() {
1378 let data = vec![3usize, 5, 7, 9].into_iter();
1379 let median1 = median(data.clone());
1380 assert_eq!(mad(data, median1), Some(2.0));
1381
1382 let data2 = vec![
1383 86usize, 60, 95, 39, 49, 12, 56, 82, 92, 24, 33, 28, 46, 34, 100, 39, 100, 38, 50, 61,
1384 39, 88, 5, 13, 64,
1385 ]
1386 .into_iter();
1387 let median2 = median(data2.clone());
1388 assert_eq!(mad(data2, median2), Some(16.0));
1389 }
1390
1391 #[test]
1392 fn mode_stream() {
1393 assert_eq!(mode(vec![3usize, 5, 7, 9].into_iter()), None);
1394 assert_eq!(mode(vec![3usize, 3, 3, 3].into_iter()), Some(3));
1395 assert_eq!(mode(vec![3usize, 3, 3, 4].into_iter()), Some(3));
1396 assert_eq!(mode(vec![4usize, 3, 3, 3].into_iter()), Some(3));
1397 assert_eq!(mode(vec![1usize, 1, 2, 3, 3].into_iter()), None);
1398 }
1399
1400 #[test]
1401 fn median_floats() {
1402 assert_eq!(median(vec![3.0f64, 5.0, 7.0, 9.0].into_iter()), Some(6.0));
1403 assert_eq!(median(vec![3.0f64, 5.0, 7.0].into_iter()), Some(5.0));
1404 }
1405
1406 #[test]
1407 fn mode_floats() {
1408 assert_eq!(mode(vec![3.0f64, 5.0, 7.0, 9.0].into_iter()), None);
1409 assert_eq!(mode(vec![3.0f64, 3.0, 3.0, 3.0].into_iter()), Some(3.0));
1410 assert_eq!(mode(vec![3.0f64, 3.0, 3.0, 4.0].into_iter()), Some(3.0));
1411 assert_eq!(mode(vec![4.0f64, 3.0, 3.0, 3.0].into_iter()), Some(3.0));
1412 assert_eq!(mode(vec![1.0f64, 1.0, 2.0, 3.0, 3.0].into_iter()), None);
1413 }
1414
1415 #[test]
1416 fn modes_stream() {
1417 assert_eq!(modes(vec![3usize, 5, 7, 9].into_iter()), (vec![], 0, 0));
1418 assert_eq!(modes(vec![3usize, 3, 3, 3].into_iter()), (vec![3], 1, 4));
1419 assert_eq!(modes(vec![3usize, 3, 4, 4].into_iter()), (vec![3, 4], 2, 2));
1420 assert_eq!(modes(vec![4usize, 3, 3, 3].into_iter()), (vec![3], 1, 3));
1421 assert_eq!(modes(vec![1usize, 1, 2, 2].into_iter()), (vec![1, 2], 2, 2));
1422 let vec: Vec<u32> = vec![];
1423 assert_eq!(modes(vec.into_iter()), (vec![], 0, 0));
1424 }
1425
1426 #[test]
1427 fn modes_floats() {
1428 assert_eq!(
1429 modes(vec![3_f64, 5.0, 7.0, 9.0].into_iter()),
1430 (vec![], 0, 0)
1431 );
1432 assert_eq!(
1433 modes(vec![3_f64, 3.0, 3.0, 3.0].into_iter()),
1434 (vec![3.0], 1, 4)
1435 );
1436 assert_eq!(
1437 modes(vec![3_f64, 3.0, 4.0, 4.0].into_iter()),
1438 (vec![3.0, 4.0], 2, 2)
1439 );
1440 assert_eq!(
1441 modes(vec![1_f64, 1.0, 2.0, 3.0, 3.0].into_iter()),
1442 (vec![1.0, 3.0], 2, 2)
1443 );
1444 }
1445
1446 #[test]
1447 fn antimodes_stream() {
1448 assert_eq!(
1449 antimodes(vec![3usize, 5, 7, 9].into_iter()),
1450 (vec![3, 5, 7, 9], 4, 1)
1451 );
1452 assert_eq!(
1453 antimodes(vec![1usize, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13].into_iter()),
1454 (vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 13, 1)
1455 );
1456 assert_eq!(
1457 antimodes(vec![1usize, 3, 3, 3].into_iter()),
1458 (vec![1], 1, 1)
1459 );
1460 assert_eq!(
1461 antimodes(vec![3usize, 3, 4, 4].into_iter()),
1462 (vec![3, 4], 2, 2)
1463 );
1464 assert_eq!(
1465 antimodes(
1466 vec![
1467 3usize, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
1468 14, 14, 15, 15
1469 ]
1470 .into_iter()
1471 ),
1472 (vec![3, 4, 5, 6, 7, 8, 9, 10, 11, 12], 13, 2)
1474 );
1475 assert_eq!(
1476 antimodes(
1477 vec![
1478 3usize, 3, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 4, 4, 5, 5, 6, 6, 7, 7, 13, 13,
1479 14, 14, 15, 15
1480 ]
1481 .into_iter()
1482 ),
1483 (vec![3, 4, 5, 6, 7, 8, 9, 10, 11, 12], 13, 2)
1484 );
1485 assert_eq!(
1486 antimodes(vec![3usize, 3, 3, 4].into_iter()),
1487 (vec![4], 1, 1)
1488 );
1489 assert_eq!(
1490 antimodes(vec![4usize, 3, 3, 3].into_iter()),
1491 (vec![4], 1, 1)
1492 );
1493 assert_eq!(
1494 antimodes(vec![1usize, 1, 2, 2].into_iter()),
1495 (vec![1, 2], 2, 2)
1496 );
1497 let vec: Vec<u32> = vec![];
1498 assert_eq!(antimodes(vec.into_iter()), (vec![], 0, 0));
1499 }
1500
1501 #[test]
1502 fn antimodes_floats() {
1503 assert_eq!(
1504 antimodes(vec![3_f64, 5.0, 7.0, 9.0].into_iter()),
1505 (vec![3.0, 5.0, 7.0, 9.0], 4, 1)
1506 );
1507 assert_eq!(
1508 antimodes(vec![3_f64, 3.0, 3.0, 3.0].into_iter()),
1509 (vec![], 0, 0)
1510 );
1511 assert_eq!(
1512 antimodes(vec![3_f64, 3.0, 4.0, 4.0].into_iter()),
1513 (vec![3.0, 4.0], 2, 2)
1514 );
1515 assert_eq!(
1516 antimodes(vec![1_f64, 1.0, 2.0, 3.0, 3.0].into_iter()),
1517 (vec![2.0], 1, 1)
1518 );
1519 }
1520
1521 #[test]
1522 fn test_custom_percentiles() {
1523 let mut unsorted: Unsorted<i32> = Unsorted::new();
1525 unsorted.extend(1..=11); let result = unsorted.custom_percentiles(&[25, 50, 75]).unwrap();
1528 assert_eq!(result, vec![3, 6, 9]);
1529
1530 let mut str_data = Unsorted::new();
1532 str_data.extend(vec!["a", "b", "c", "d", "e"]);
1533 let result = str_data.custom_percentiles(&[20, 40, 60, 80]).unwrap();
1534 assert_eq!(result, vec!["a", "b", "c", "d"]);
1535
1536 let mut char_data = Unsorted::new();
1538 char_data.extend('a'..='e');
1539 let result = char_data.custom_percentiles(&[25, 50, 75]).unwrap();
1540 assert_eq!(result, vec!['b', 'c', 'd']);
1541
1542 let mut float_data = Unsorted::new();
1544 float_data.extend(vec![1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9]);
1545 let result = float_data
1546 .custom_percentiles(&[10, 30, 50, 70, 90])
1547 .unwrap();
1548 assert_eq!(result, vec![1.1, 3.3, 5.5, 7.7, 9.9]);
1549
1550 let result = float_data.custom_percentiles(&[]).unwrap();
1552 assert_eq!(result, Vec::<f64>::new());
1553
1554 let result = float_data.custom_percentiles(&[50, 50, 50]).unwrap();
1556 assert_eq!(result, vec![5.5]);
1557
1558 let result = float_data.custom_percentiles(&[0, 100]).unwrap();
1560 assert_eq!(result, vec![1.1, 9.9]);
1561
1562 let result = float_data.custom_percentiles(&[75, 25, 50]).unwrap();
1564 assert_eq!(result, vec![3.3, 5.5, 7.7]); let mut single = Unsorted::new();
1568 single.add(42);
1569 let result = single.custom_percentiles(&[0, 50, 100]).unwrap();
1570 assert_eq!(result, vec![42, 42, 42]);
1571 }
1572
1573 #[test]
1574 fn quartiles_stream() {
1575 assert_eq!(
1576 quartiles(vec![3usize, 5, 7].into_iter()),
1577 Some((3., 5., 7.))
1578 );
1579 assert_eq!(
1580 quartiles(vec![3usize, 5, 7, 9].into_iter()),
1581 Some((4., 6., 8.))
1582 );
1583 assert_eq!(
1584 quartiles(vec![1usize, 2, 7, 11].into_iter()),
1585 Some((1.5, 4.5, 9.))
1586 );
1587 assert_eq!(
1588 quartiles(vec![3usize, 5, 7, 9, 12].into_iter()),
1589 Some((4., 7., 10.5))
1590 );
1591 assert_eq!(
1592 quartiles(vec![2usize, 2, 3, 8, 10].into_iter()),
1593 Some((2., 3., 9.))
1594 );
1595 assert_eq!(
1596 quartiles(vec![3usize, 5, 7, 9, 12, 20].into_iter()),
1597 Some((5., 8., 12.))
1598 );
1599 assert_eq!(
1600 quartiles(vec![0usize, 2, 4, 8, 10, 11].into_iter()),
1601 Some((2., 6., 10.))
1602 );
1603 assert_eq!(
1604 quartiles(vec![3usize, 5, 7, 9, 12, 20, 21].into_iter()),
1605 Some((5., 9., 20.))
1606 );
1607 assert_eq!(
1608 quartiles(vec![1usize, 5, 6, 6, 7, 10, 19].into_iter()),
1609 Some((5., 6., 10.))
1610 );
1611 }
1612
1613 #[test]
1614 fn quartiles_floats() {
1615 assert_eq!(
1616 quartiles(vec![3_f64, 5., 7.].into_iter()),
1617 Some((3., 5., 7.))
1618 );
1619 assert_eq!(
1620 quartiles(vec![3_f64, 5., 7., 9.].into_iter()),
1621 Some((4., 6., 8.))
1622 );
1623 assert_eq!(
1624 quartiles(vec![3_f64, 5., 7., 9., 12.].into_iter()),
1625 Some((4., 7., 10.5))
1626 );
1627 assert_eq!(
1628 quartiles(vec![3_f64, 5., 7., 9., 12., 20.].into_iter()),
1629 Some((5., 8., 12.))
1630 );
1631 assert_eq!(
1632 quartiles(vec![3_f64, 5., 7., 9., 12., 20., 21.].into_iter()),
1633 Some((5., 9., 20.))
1634 );
1635 }
1636
1637 #[test]
1638 fn test_quartiles_zero_copy_small() {
1639 let unsorted: Unsorted<i32> = Unsorted::new();
1641 assert_eq!(unsorted.quartiles_zero_copy(), None);
1642
1643 let mut unsorted = Unsorted::new();
1644 unsorted.extend(vec![1, 2]);
1645 assert_eq!(unsorted.quartiles_zero_copy(), None);
1646
1647 let mut unsorted = Unsorted::new();
1648 unsorted.extend(vec![1, 2, 3]);
1649 assert_eq!(unsorted.quartiles_zero_copy(), Some((1.0, 2.0, 3.0)));
1650
1651 let mut unsorted = Unsorted::new();
1653 unsorted.extend(vec![3, 5, 7, 9]);
1654 assert_eq!(unsorted.quartiles_zero_copy(), Some((4.0, 6.0, 8.0)));
1655 }
1656}
1657
1658#[cfg(test)]
1659mod bench {
1660 use super::*;
1661 use std::time::Instant;
1662
1663 #[test]
1664 #[ignore] fn comprehensive_quartiles_benchmark() {
1666 let data_sizes = vec![
1668 1_000, 10_000, 100_000, 500_000, 1_000_000, 2_000_000, 5_000_000, 10_000_000,
1669 ];
1670
1671 println!("=== COMPREHENSIVE QUARTILES BENCHMARK ===\n");
1672
1673 for size in data_sizes {
1674 println!("--- Testing with {} elements ---", size);
1675
1676 let test_patterns = vec![
1678 ("Random", generate_random_data(size)),
1679 ("Reverse Sorted", {
1680 let mut v = Vec::with_capacity(size);
1681 for x in (0..size).rev() {
1682 v.push(x as i32);
1683 }
1684 v
1685 }),
1686 ("Already Sorted", {
1687 let mut v = Vec::with_capacity(size);
1688 for x in 0..size {
1689 v.push(x as i32);
1690 }
1691 v
1692 }),
1693 ("Many Duplicates", {
1694 let mut v = Vec::with_capacity(size);
1696 let chunk_size = size / 100;
1697 for i in 0..100 {
1698 v.extend(std::iter::repeat(i as i32).take(chunk_size));
1699 }
1700 v.extend(std::iter::repeat(0).take(size - v.len()));
1702 v
1703 }),
1704 ];
1705
1706 for (pattern_name, test_data) in test_patterns {
1707 println!("\n Pattern: {}", pattern_name);
1708
1709 let mut unsorted1 = Unsorted::new();
1711 unsorted1.extend(test_data.clone());
1712
1713 let start = Instant::now();
1714 let result_sorted = unsorted1.quartiles();
1715 let sorted_time = start.elapsed();
1716
1717 let mut unsorted2 = Unsorted::new();
1719 unsorted2.extend(test_data.clone());
1720
1721 let start = Instant::now();
1722 let result_selection = unsorted2.quartiles_with_selection();
1723 let selection_time = start.elapsed();
1724
1725 let mut unsorted3 = Unsorted::new();
1727 unsorted3.extend(test_data);
1728
1729 let start = Instant::now();
1730 let result_zero_copy = unsorted3.quartiles_zero_copy();
1731 let zero_copy_time = start.elapsed();
1732
1733 assert_eq!(result_sorted, result_selection);
1735 assert_eq!(result_sorted, result_zero_copy);
1736
1737 let selection_speedup =
1738 sorted_time.as_nanos() as f64 / selection_time.as_nanos() as f64;
1739 let zero_copy_speedup =
1740 sorted_time.as_nanos() as f64 / zero_copy_time.as_nanos() as f64;
1741
1742 println!(" Sorting: {:>12?}", sorted_time);
1743 println!(
1744 " Selection: {:>12?} (speedup: {:.2}x)",
1745 selection_time, selection_speedup
1746 );
1747 println!(
1748 " Zero-copy: {:>12?} (speedup: {:.2}x)",
1749 zero_copy_time, zero_copy_speedup
1750 );
1751
1752 let best_algorithm =
1753 if zero_copy_speedup > 1.0 && zero_copy_speedup >= selection_speedup {
1754 "ZERO-COPY"
1755 } else if selection_speedup > 1.0 {
1756 "SELECTION"
1757 } else {
1758 "SORTING"
1759 };
1760 println!(" Best: {}", best_algorithm);
1761 }
1762
1763 println!(); }
1765 }
1766
1767 fn generate_random_data(size: usize) -> Vec<i32> {
1769 let mut rng = 1234567u64;
1771 let mut vec = Vec::with_capacity(size);
1772 for _ in 0..size {
1773 rng = rng.wrapping_mul(1103515245).wrapping_add(12345);
1774 vec.push((rng >> 16) as i32);
1775 }
1776 vec
1777 }
1778
1779 #[test]
1780 #[ignore] fn find_selection_threshold() {
1782 println!("=== FINDING SELECTION ALGORITHM THRESHOLD ===\n");
1783
1784 let mut found_threshold = None;
1786 let test_sizes = vec![
1787 1_000_000, 2_000_000, 3_000_000, 4_000_000, 5_000_000, 7_500_000, 10_000_000,
1788 15_000_000, 20_000_000, 25_000_000, 30_000_000,
1789 ];
1790
1791 for size in test_sizes {
1792 println!("Testing size: {}", size);
1793
1794 let test_data = generate_random_data(size);
1796
1797 let iterations = 3;
1799 let mut sorting_total = 0u128;
1800 let mut selection_total = 0u128;
1801 let mut zero_copy_total = 0u128;
1802
1803 for i in 0..iterations {
1804 println!(" Iteration {}/{}", i + 1, iterations);
1805
1806 let mut unsorted1 = Unsorted::new();
1808 unsorted1.extend(test_data.clone());
1809
1810 let start = Instant::now();
1811 let _result_sorted = unsorted1.quartiles();
1812 sorting_total += start.elapsed().as_nanos();
1813
1814 let mut unsorted2 = Unsorted::new();
1816 unsorted2.extend(test_data.clone());
1817
1818 let start = Instant::now();
1819 let _result_selection = unsorted2.quartiles_with_selection();
1820 selection_total += start.elapsed().as_nanos();
1821
1822 let mut unsorted3 = Unsorted::new();
1824 unsorted3.extend(test_data.clone());
1825
1826 let start = Instant::now();
1827 let _result_zero_copy = unsorted3.quartiles_zero_copy();
1828 zero_copy_total += start.elapsed().as_nanos();
1829 }
1830
1831 let avg_sorting = sorting_total / iterations as u128;
1832 let avg_selection = selection_total / iterations as u128;
1833 let avg_zero_copy = zero_copy_total / iterations as u128;
1834 let selection_speedup = avg_sorting as f64 / avg_selection as f64;
1835 let zero_copy_speedup = avg_sorting as f64 / avg_zero_copy as f64;
1836
1837 println!(
1838 " Average sorting: {:>12.2}ms",
1839 avg_sorting as f64 / 1_000_000.0
1840 );
1841 println!(
1842 " Average selection: {:>12.2}ms (speedup: {:.2}x)",
1843 avg_selection as f64 / 1_000_000.0,
1844 selection_speedup
1845 );
1846 println!(
1847 " Average zero-copy: {:>12.2}ms (speedup: {:.2}x)",
1848 avg_zero_copy as f64 / 1_000_000.0,
1849 zero_copy_speedup
1850 );
1851
1852 if (selection_speedup > 1.0 || zero_copy_speedup > 1.0) && found_threshold.is_none() {
1853 found_threshold = Some(size);
1854 let best_method = if zero_copy_speedup > selection_speedup {
1855 "Zero-copy"
1856 } else {
1857 "Selection"
1858 };
1859 println!(
1860 " *** THRESHOLD FOUND: {} becomes faster at {} elements ***",
1861 best_method, size
1862 );
1863 }
1864
1865 println!();
1866 }
1867
1868 match found_threshold {
1869 Some(threshold) => println!(
1870 "🎯 Selection algorithm becomes faster at approximately {} elements",
1871 threshold
1872 ),
1873 None => println!("❌ Selection algorithm did not become faster in the tested range"),
1874 }
1875 }
1876
1877 #[test]
1878 #[ignore] fn benchmark_different_data_types() {
1880 println!("=== BENCHMARKING DIFFERENT DATA TYPES ===\n");
1881
1882 let size = 5_000_000; println!("Testing with f64 data:");
1886 let float_data: Vec<f64> = generate_random_data(size)
1887 .into_iter()
1888 .map(|x| x as f64 / 1000.0)
1889 .collect();
1890
1891 let mut unsorted1 = Unsorted::new();
1892 unsorted1.extend(float_data.clone());
1893 let start = Instant::now();
1894 let _result = unsorted1.quartiles();
1895 let sorting_time = start.elapsed();
1896
1897 let mut unsorted2 = Unsorted::new();
1898 unsorted2.extend(float_data.clone());
1899 let start = Instant::now();
1900 let _result = unsorted2.quartiles_with_selection();
1901 let selection_time = start.elapsed();
1902
1903 let mut unsorted3 = Unsorted::new();
1904 unsorted3.extend(float_data);
1905 let start = Instant::now();
1906 let _result = unsorted3.quartiles_zero_copy();
1907 let zero_copy_time = start.elapsed();
1908
1909 println!(" Sorting: {:?}", sorting_time);
1910 println!(" Selection: {:?}", selection_time);
1911 println!(" Zero-copy: {:?}", zero_copy_time);
1912 println!(
1913 " Selection Speedup: {:.2}x",
1914 sorting_time.as_nanos() as f64 / selection_time.as_nanos() as f64
1915 );
1916 println!(
1917 " Zero-copy Speedup: {:.2}x\n",
1918 sorting_time.as_nanos() as f64 / zero_copy_time.as_nanos() as f64
1919 );
1920
1921 println!("Testing with i64 data:");
1923 let int64_data: Vec<i64> = generate_random_data(size)
1924 .into_iter()
1925 .map(|x| x as i64 * 1000)
1926 .collect();
1927
1928 let mut unsorted1 = Unsorted::new();
1929 unsorted1.extend(int64_data.clone());
1930 let start = Instant::now();
1931 let _result = unsorted1.quartiles();
1932 let sorting_time = start.elapsed();
1933
1934 let mut unsorted2 = Unsorted::new();
1935 unsorted2.extend(int64_data.clone());
1936 let start = Instant::now();
1937 let _result = unsorted2.quartiles_with_selection();
1938 let selection_time = start.elapsed();
1939
1940 let mut unsorted3 = Unsorted::new();
1941 unsorted3.extend(int64_data);
1942 let start = Instant::now();
1943 let _result = unsorted3.quartiles_zero_copy();
1944 let zero_copy_time = start.elapsed();
1945
1946 println!(" Sorting: {:?}", sorting_time);
1947 println!(" Selection: {:?}", selection_time);
1948 println!(" Zero-copy: {:?}", zero_copy_time);
1949 println!(
1950 " Selection Speedup: {:.2}x",
1951 sorting_time.as_nanos() as f64 / selection_time.as_nanos() as f64
1952 );
1953 println!(
1954 " Zero-copy Speedup: {:.2}x",
1955 sorting_time.as_nanos() as f64 / zero_copy_time.as_nanos() as f64
1956 );
1957 }
1958}