1use num_traits::ToPrimitive;
2use rayon::iter::{IndexedParallelIterator, IntoParallelRefIterator, ParallelIterator};
3use rayon::prelude::ParallelSlice;
4use rayon::slice::ParallelSliceMut;
5
6use serde::{Deserialize, Serialize};
7
8use {crate::Commute, crate::Partial};
9
10const PARALLEL_THRESHOLD: usize = 10_000;
11
12pub fn median<I>(it: I) -> Option<f64>
16where
17 I: Iterator,
18 <I as Iterator>::Item: PartialOrd + ToPrimitive,
19{
20 it.collect::<Unsorted<_>>().median()
21}
22
23pub fn mad<I>(it: I, precalc_median: Option<f64>) -> Option<f64>
25where
26 I: Iterator,
27 <I as Iterator>::Item: PartialOrd + ToPrimitive,
28{
29 it.collect::<Unsorted<_>>().mad(precalc_median)
30}
31
32pub fn quartiles<I>(it: I) -> Option<(f64, f64, f64)>
36where
37 I: Iterator,
38 <I as Iterator>::Item: PartialOrd + ToPrimitive,
39{
40 it.collect::<Unsorted<_>>().quartiles()
41}
42
43pub fn mode<T, I>(it: I) -> Option<T>
49where
50 T: PartialOrd + Clone,
51 I: Iterator<Item = T>,
52{
53 it.collect::<Unsorted<T>>().mode()
54}
55
56pub fn modes<T, I>(it: I) -> (Vec<T>, usize, u32)
74where
75 T: PartialOrd + Clone,
76 I: Iterator<Item = T>,
77{
78 it.collect::<Unsorted<T>>().modes()
79}
80
81pub fn antimodes<T, I>(it: I) -> (Vec<T>, usize, u32)
104where
105 T: PartialOrd + Clone,
106 I: Iterator<Item = T>,
107{
108 let (antimodes_result, antimodes_count, antimodes_occurrences) =
109 it.collect::<Unsorted<T>>().antimodes();
110 (antimodes_result, antimodes_count, antimodes_occurrences)
111}
112
113pub fn gini<I>(it: I) -> Option<f64>
120where
121 I: Iterator,
122 <I as Iterator>::Item: PartialOrd + ToPrimitive + Sync,
123{
124 it.collect::<Unsorted<_>>().gini()
125}
126
127pub fn kurtosis<I>(it: I) -> Option<f64>
135where
136 I: Iterator,
137 <I as Iterator>::Item: PartialOrd + ToPrimitive + Sync,
138{
139 it.collect::<Unsorted<_>>().kurtosis()
140}
141
142pub fn percentile_rank<I, V>(it: I, value: V) -> Option<f64>
149where
150 I: Iterator,
151 <I as Iterator>::Item: PartialOrd + ToPrimitive + Sync,
152 V: PartialOrd + ToPrimitive,
153{
154 it.collect::<Unsorted<_>>().percentile_rank(value)
155}
156
157pub fn atkinson<I>(it: I, epsilon: f64) -> Option<f64>
165where
166 I: Iterator,
167 <I as Iterator>::Item: PartialOrd + ToPrimitive + Sync,
168{
169 it.collect::<Unsorted<_>>().atkinson(epsilon)
170}
171
172fn median_on_sorted<T>(data: &[T]) -> Option<f64>
173where
174 T: PartialOrd + ToPrimitive,
175{
176 Some(match data.len() {
177 0 => return None,
179 1 => data.first()?.to_f64()?,
181 len if len.is_multiple_of(2) => {
183 let idx = len / 2;
184 let v1 = unsafe { data.get_unchecked(idx - 1) }.to_f64()?;
187 let v2 = unsafe { data.get_unchecked(idx) }.to_f64()?;
188 f64::midpoint(v1, v2)
189 }
190 len => unsafe { data.get_unchecked(len / 2) }.to_f64()?,
193 })
194}
195
196fn mad_on_sorted<T>(data: &[T], precalc_median: Option<f64>) -> Option<f64>
197where
198 T: Sync + PartialOrd + ToPrimitive,
199{
200 if data.is_empty() {
201 return None;
202 }
203 let median_obs = precalc_median.unwrap_or_else(|| median_on_sorted(data).unwrap());
204
205 let mut abs_diff_vec = if data.len() < PARALLEL_THRESHOLD {
207 let mut vec = Vec::with_capacity(data.len());
209 for x in data {
210 vec.push((median_obs - unsafe { x.to_f64().unwrap_unchecked() }).abs());
211 }
212 vec
213 } else {
214 data.par_iter()
216 .map(|x| (median_obs - unsafe { x.to_f64().unwrap_unchecked() }).abs())
217 .collect()
218 };
219
220 if abs_diff_vec.len() < PARALLEL_THRESHOLD {
222 abs_diff_vec.sort_unstable_by(|a, b| unsafe { a.partial_cmp(b).unwrap_unchecked() });
223 } else {
224 abs_diff_vec.par_sort_unstable_by(|a, b| unsafe { a.partial_cmp(b).unwrap_unchecked() });
225 }
226 median_on_sorted(&abs_diff_vec)
227}
228
229fn gini_on_sorted<T>(data: &[Partial<T>]) -> Option<f64>
230where
231 T: Sync + PartialOrd + ToPrimitive,
232{
233 let len = data.len();
234
235 if len == 0 {
237 return None;
238 }
239
240 if len == 1 {
242 return Some(0.0);
243 }
244
245 let (sum, weighted_sum) = if len < PARALLEL_THRESHOLD {
247 let mut sum = 0.0;
249 let mut weighted_sum = 0.0;
250
251 for (i, x) in data.iter().enumerate() {
252 let val = x.0.to_f64()?;
253 sum += val;
254 weighted_sum += (i + 1) as f64 * val;
255 }
256
257 (sum, weighted_sum)
258 } else {
259 let sum: f64 = data
262 .par_iter()
263 .map(|x| unsafe { x.0.to_f64().unwrap_unchecked() })
264 .sum();
265
266 let weighted_sum: f64 = data
268 .par_iter()
269 .enumerate()
270 .map(|(i, x)| {
271 let val = unsafe { x.0.to_f64().unwrap_unchecked() };
272 (i + 1) as f64 * val
273 })
274 .sum();
275
276 (sum, weighted_sum)
277 };
278
279 if sum == 0.0 {
281 return None;
282 }
283
284 let n = len as f64;
288 let gini = (2.0 * weighted_sum) / (n * sum) - (n + 1.0) / n;
289
290 Some(gini)
291}
292
293fn kurtosis_on_sorted<T>(data: &[Partial<T>]) -> Option<f64>
294where
295 T: Sync + PartialOrd + ToPrimitive,
296{
297 let len = data.len();
298
299 if len < 4 {
301 return None;
302 }
303
304 let sum: f64 = if len < PARALLEL_THRESHOLD {
306 let mut sum = 0.0;
307 for x in data {
308 sum += x.0.to_f64()?;
309 }
310 sum
311 } else {
312 data.par_iter()
313 .map(|x| unsafe { x.0.to_f64().unwrap_unchecked() })
314 .sum()
315 };
316
317 let mean = sum / len as f64;
318
319 let (variance_sum, fourth_power_sum) = if len < PARALLEL_THRESHOLD {
321 let mut variance_sum = 0.0;
322 let mut fourth_power_sum = 0.0;
323
324 for x in data {
325 let val = x.0.to_f64()?;
326 let diff = val - mean;
327 let diff_sq = diff * diff;
328 variance_sum += diff_sq;
329 fourth_power_sum += diff_sq * diff_sq;
330 }
331
332 (variance_sum, fourth_power_sum)
333 } else {
334 let variance_sum: f64 = data
335 .par_iter()
336 .map(|x| {
337 let val = unsafe { x.0.to_f64().unwrap_unchecked() };
338 let diff = val - mean;
339 diff * diff
340 })
341 .sum();
342
343 let fourth_power_sum: f64 = data
344 .par_iter()
345 .map(|x| {
346 let val = unsafe { x.0.to_f64().unwrap_unchecked() };
347 let diff = val - mean;
348 let diff_sq = diff * diff;
349 diff_sq * diff_sq
350 })
351 .sum();
352
353 (variance_sum, fourth_power_sum)
354 };
355
356 let variance = variance_sum / len as f64;
357
358 if variance == 0.0 {
360 return None;
361 }
362
363 let n = len as f64;
364 let variance_sq = variance * variance;
365
366 let kurtosis = (n * (n + 1.0) * fourth_power_sum)
369 / ((n - 1.0) * (n - 2.0) * (n - 3.0) * variance_sq)
370 - 3.0 * (n - 1.0) * (n - 1.0) / ((n - 2.0) * (n - 3.0));
371
372 Some(kurtosis)
373}
374
375fn percentile_rank_on_sorted<T, V>(data: &[Partial<T>], value: &V) -> Option<f64>
376where
377 T: PartialOrd + ToPrimitive,
378 V: PartialOrd + ToPrimitive,
379{
380 let len = data.len();
381
382 if len == 0 {
383 return None;
384 }
385
386 let value_f64 = value.to_f64()?;
387
388 let count_leq = data.binary_search_by(|x| {
391 x.0.to_f64()
392 .unwrap_or(f64::NAN)
393 .partial_cmp(&value_f64)
394 .unwrap_or(std::cmp::Ordering::Less)
395 });
396
397 let count = match count_leq {
398 Ok(idx) => {
399 let mut count = idx + 1;
401 for x in data.iter().skip(idx + 1) {
403 if let Some(x_val) = x.0.to_f64() {
404 if (x_val - value_f64).abs() < 1e-9 {
405 count += 1;
406 } else {
407 break;
408 }
409 } else {
410 break;
411 }
412 }
413 count
414 }
415 Err(idx) => idx, };
417
418 Some((count as f64 / len as f64) * 100.0)
420}
421
422fn atkinson_on_sorted<T>(data: &[Partial<T>], epsilon: f64) -> Option<f64>
423where
424 T: Sync + PartialOrd + ToPrimitive,
425{
426 let len = data.len();
427
428 if len == 0 {
430 return None;
431 }
432
433 if len == 1 {
435 return Some(0.0);
436 }
437
438 if epsilon < 0.0 {
440 return None;
441 }
442
443 let sum: f64 = if len < PARALLEL_THRESHOLD {
445 let mut sum = 0.0;
446 for x in data {
447 sum += x.0.to_f64()?;
448 }
449 sum
450 } else {
451 data.par_iter()
452 .map(|x| unsafe { x.0.to_f64().unwrap_unchecked() })
453 .sum()
454 };
455
456 let mean = sum / len as f64;
457
458 if mean == 0.0 {
460 return None;
461 }
462
463 if (epsilon - 1.0).abs() < 1e-10 {
465 let geometric_sum: f64 = if len < PARALLEL_THRESHOLD {
467 let mut sum = 0.0;
468 for x in data {
469 let val = x.0.to_f64()?;
470 if val <= 0.0 {
471 return None;
473 }
474 sum += val.ln();
475 }
476 sum
477 } else {
478 data.par_iter()
479 .map(|x| {
480 let val = unsafe { x.0.to_f64().unwrap_unchecked() };
481 if val <= 0.0 {
482 return f64::NAN;
483 }
484 val.ln()
485 })
486 .sum()
487 };
488
489 if geometric_sum.is_nan() {
490 return None;
491 }
492
493 let geometric_mean = (geometric_sum / len as f64).exp();
494 return Some(1.0 - geometric_mean / mean);
495 }
496
497 let exponent = 1.0 - epsilon;
500
501 let sum_powered: f64 = if len < PARALLEL_THRESHOLD {
502 let mut sum = 0.0;
503 for x in data {
504 let val = x.0.to_f64()?;
505 if val < 0.0 {
506 return None;
508 }
509 let ratio = val / mean;
510 sum += ratio.powf(exponent);
511 }
512 sum
513 } else {
514 data.par_iter()
515 .map(|x| {
516 let val = unsafe { x.0.to_f64().unwrap_unchecked() };
517 if val < 0.0 {
518 return f64::NAN;
519 }
520 let ratio = val / mean;
521 ratio.powf(exponent)
522 })
523 .sum()
524 };
525
526 if sum_powered.is_nan() || sum_powered <= 0.0 {
527 return None;
528 }
529
530 let atkinson = 1.0 - (sum_powered / len as f64).powf(1.0 / exponent);
531 Some(atkinson)
532}
533
534fn quickselect<T>(data: &mut [Partial<T>], k: usize) -> Option<&T>
537where
538 T: PartialOrd,
539{
540 if data.is_empty() || k >= data.len() {
541 return None;
542 }
543
544 let mut left = 0;
545 let mut right = data.len() - 1;
546
547 loop {
548 if left == right {
549 return Some(&data[left].0);
550 }
551
552 let pivot_idx = median_of_three_pivot(data, left, right);
554 let pivot_idx = partition(data, left, right, pivot_idx);
555
556 match k.cmp(&pivot_idx) {
557 std::cmp::Ordering::Equal => return Some(&data[pivot_idx].0),
558 std::cmp::Ordering::Less => right = pivot_idx - 1,
559 std::cmp::Ordering::Greater => left = pivot_idx + 1,
560 }
561 }
562}
563
564fn quickselect_by_index<'a, T>(
567 data: &'a [Partial<T>],
568 indices: &mut [usize],
569 k: usize,
570) -> Option<&'a T>
571where
572 T: PartialOrd,
573{
574 if data.is_empty() || indices.is_empty() || k >= indices.len() {
575 return None;
576 }
577
578 let mut left = 0;
579 let mut right = indices.len() - 1;
580
581 loop {
582 if left == right {
583 return Some(&data[indices[left]].0);
584 }
585
586 let pivot_idx = median_of_three_pivot_by_index(data, indices, left, right);
588 let pivot_idx = partition_by_index(data, indices, left, right, pivot_idx);
589
590 match k.cmp(&pivot_idx) {
591 std::cmp::Ordering::Equal => return Some(&data[indices[pivot_idx]].0),
592 std::cmp::Ordering::Less => right = pivot_idx - 1,
593 std::cmp::Ordering::Greater => left = pivot_idx + 1,
594 }
595 }
596}
597
598fn median_of_three_pivot<T>(data: &[Partial<T>], left: usize, right: usize) -> usize
600where
601 T: PartialOrd,
602{
603 let mid = left + (right - left) / 2;
604
605 if data[left] <= data[mid] {
606 if data[mid] <= data[right] {
607 mid
608 } else if data[left] <= data[right] {
609 right
610 } else {
611 left
612 }
613 } else if data[left] <= data[right] {
614 left
615 } else if data[mid] <= data[right] {
616 right
617 } else {
618 mid
619 }
620}
621
622fn median_of_three_pivot_by_index<T>(
624 data: &[Partial<T>],
625 indices: &[usize],
626 left: usize,
627 right: usize,
628) -> usize
629where
630 T: PartialOrd,
631{
632 let mid = left + (right - left) / 2;
633
634 if data[indices[left]] <= data[indices[mid]] {
635 if data[indices[mid]] <= data[indices[right]] {
636 mid
637 } else if data[indices[left]] <= data[indices[right]] {
638 right
639 } else {
640 left
641 }
642 } else if data[indices[left]] <= data[indices[right]] {
643 left
644 } else if data[indices[mid]] <= data[indices[right]] {
645 right
646 } else {
647 mid
648 }
649}
650
651fn partition<T>(data: &mut [Partial<T>], left: usize, right: usize, pivot_idx: usize) -> usize
653where
654 T: PartialOrd,
655{
656 data.swap(pivot_idx, right);
658 let mut store_idx = left;
659
660 for i in left..right {
663 if unsafe { data.get_unchecked(i) <= data.get_unchecked(right) } {
666 data.swap(i, store_idx);
667 store_idx += 1;
668 }
669 }
670
671 data.swap(store_idx, right);
673 store_idx
674}
675
676fn partition_by_index<T>(
678 data: &[Partial<T>],
679 indices: &mut [usize],
680 left: usize,
681 right: usize,
682 pivot_idx: usize,
683) -> usize
684where
685 T: PartialOrd,
686{
687 indices.swap(pivot_idx, right);
689 let mut store_idx = left;
690
691 let pivot_idx_cached = unsafe { *indices.get_unchecked(right) };
695 let pivot_val = unsafe { data.get_unchecked(pivot_idx_cached) };
696
697 let mut elem_idx: usize;
699 for i in left..right {
700 elem_idx = unsafe { *indices.get_unchecked(i) };
702 if unsafe { data.get_unchecked(elem_idx) <= pivot_val } {
703 indices.swap(i, store_idx);
704 store_idx += 1;
705 }
706 }
707
708 indices.swap(store_idx, right);
710 store_idx
711}
712
713fn quartiles_on_sorted<T>(data: &[Partial<T>]) -> Option<(f64, f64, f64)>
717where
718 T: PartialOrd + ToPrimitive,
719{
720 let len = data.len();
721
722 match len {
724 0..=2 => return None,
725 3 => {
726 return Some(
727 unsafe {
729 (
730 data.get_unchecked(0).0.to_f64()?,
731 data.get_unchecked(1).0.to_f64()?,
732 data.get_unchecked(2).0.to_f64()?,
733 )
734 },
735 );
736 }
737 _ => {}
738 }
739
740 let k = len / 4;
742 let remainder = len % 4;
743
744 unsafe {
747 Some(match remainder {
748 0 => {
749 let q1 = f64::midpoint(
760 data.get_unchecked(k - 1).0.to_f64()?,
761 data.get_unchecked(k).0.to_f64()?,
762 );
763 let q2 = f64::midpoint(
764 data.get_unchecked(2 * k - 1).0.to_f64()?,
765 data.get_unchecked(2 * k).0.to_f64()?,
766 );
767 let q3 = f64::midpoint(
768 data.get_unchecked(3 * k - 1).0.to_f64()?,
769 data.get_unchecked(3 * k).0.to_f64()?,
770 );
771 (q1, q2, q3)
772 }
773 1 => {
774 let q1 = f64::midpoint(
785 data.get_unchecked(k - 1).0.to_f64()?,
786 data.get_unchecked(k).0.to_f64()?,
787 );
788 let q2 = data.get_unchecked(2 * k).0.to_f64()?;
789 let q3 = f64::midpoint(
790 data.get_unchecked(3 * k).0.to_f64()?,
791 data.get_unchecked(3 * k + 1).0.to_f64()?,
792 );
793 (q1, q2, q3)
794 }
795 2 => {
796 let q1 = data.get_unchecked(k).0.to_f64()?;
807 let q2 = f64::midpoint(
808 data.get_unchecked(2 * k).0.to_f64()?,
809 data.get_unchecked(2 * k + 1).0.to_f64()?,
810 );
811 let q3 = data.get_unchecked(3 * k + 1).0.to_f64()?;
812 (q1, q2, q3)
813 }
814 _ => {
815 let q1 = data.get_unchecked(k).0.to_f64()?;
826 let q2 = data.get_unchecked(2 * k + 1).0.to_f64()?;
827 let q3 = data.get_unchecked(3 * k + 2).0.to_f64()?;
828 (q1, q2, q3)
829 }
830 })
831 }
832}
833
834fn quartiles_with_selection<T>(data: &mut [Partial<T>]) -> Option<(f64, f64, f64)>
837where
838 T: PartialOrd + ToPrimitive,
839{
840 let len = data.len();
841
842 match len {
844 0..=2 => return None,
845 3 => {
846 let min_val = quickselect(data, 0)?.to_f64()?;
848 let med_val = quickselect(data, 1)?.to_f64()?;
849 let max_val = quickselect(data, 2)?.to_f64()?;
850 return Some((min_val, med_val, max_val));
851 }
852 _ => {}
853 }
854
855 let k = len / 4;
857 let remainder = len % 4;
858
859 match remainder {
861 0 => {
862 let q1_low = quickselect(data, k - 1)?.to_f64()?;
867 let q1_high = quickselect(data, k)?.to_f64()?;
868 let q1 = f64::midpoint(q1_low, q1_high);
869
870 let q2_low = quickselect(data, 2 * k - 1)?.to_f64()?;
871 let q2_high = quickselect(data, 2 * k)?.to_f64()?;
872 let q2 = f64::midpoint(q2_low, q2_high);
873
874 let q3_low = quickselect(data, 3 * k - 1)?.to_f64()?;
875 let q3_high = quickselect(data, 3 * k)?.to_f64()?;
876 let q3 = f64::midpoint(q3_low, q3_high);
877
878 Some((q1, q2, q3))
879 }
880 1 => {
881 let q1_low = quickselect(data, k - 1)?.to_f64()?;
886 let q1_high = quickselect(data, k)?.to_f64()?;
887 let q1 = f64::midpoint(q1_low, q1_high);
888
889 let q2 = quickselect(data, 2 * k)?.to_f64()?;
890
891 let q3_low = quickselect(data, 3 * k)?.to_f64()?;
892 let q3_high = quickselect(data, 3 * k + 1)?.to_f64()?;
893 let q3 = f64::midpoint(q3_low, q3_high);
894
895 Some((q1, q2, q3))
896 }
897 2 => {
898 let q1 = quickselect(data, k)?.to_f64()?;
903
904 let q2_low = quickselect(data, 2 * k)?.to_f64()?;
905 let q2_high = quickselect(data, 2 * k + 1)?.to_f64()?;
906 let q2 = f64::midpoint(q2_low, q2_high);
907
908 let q3 = quickselect(data, 3 * k + 1)?.to_f64()?;
909
910 Some((q1, q2, q3))
911 }
912 _ => {
913 let q1 = quickselect(data, k)?.to_f64()?;
918 let q2 = quickselect(data, 2 * k + 1)?.to_f64()?;
919 let q3 = quickselect(data, 3 * k + 2)?.to_f64()?;
920
921 Some((q1, q2, q3))
922 }
923 }
924}
925
926fn quartiles_with_zero_copy_selection<T>(data: &[Partial<T>]) -> Option<(f64, f64, f64)>
929where
930 T: PartialOrd + ToPrimitive,
931{
932 let len = data.len();
933
934 match len {
936 0..=2 => return None,
937 3 => {
938 let mut indices = vec![0, 1, 2];
940 let min_val = quickselect_by_index(data, &mut indices, 0)?.to_f64()?;
941 let med_val = quickselect_by_index(data, &mut indices, 1)?.to_f64()?;
942 let max_val = quickselect_by_index(data, &mut indices, 2)?.to_f64()?;
943 return Some((min_val, med_val, max_val));
944 }
945 _ => {}
946 }
947
948 let k = len / 4;
950 let remainder = len % 4;
951
952 let indices_template: Vec<usize> = (0..len).collect();
954 #[allow(unused)]
955 let mut indices: Vec<usize> = Vec::with_capacity(len);
956
957 match remainder {
961 0 => {
962 indices.clone_from(&indices_template);
964 let q1_low = quickselect_by_index(data, &mut indices, k - 1)?.to_f64()?;
965 indices.clone_from(&indices_template);
966 let q1_high = quickselect_by_index(data, &mut indices, k)?.to_f64()?;
967 let q1 = f64::midpoint(q1_low, q1_high);
968
969 indices.clone_from(&indices_template);
970 let q2_low = quickselect_by_index(data, &mut indices, 2 * k - 1)?.to_f64()?;
971 indices.clone_from(&indices_template);
972 let q2_high = quickselect_by_index(data, &mut indices, 2 * k)?.to_f64()?;
973 let q2 = f64::midpoint(q2_low, q2_high);
974
975 indices.clone_from(&indices_template);
976 let q3_low = quickselect_by_index(data, &mut indices, 3 * k - 1)?.to_f64()?;
977 indices.clone_from(&indices_template);
978 let q3_high = quickselect_by_index(data, &mut indices, 3 * k)?.to_f64()?;
979 let q3 = f64::midpoint(q3_low, q3_high);
980
981 Some((q1, q2, q3))
982 }
983 1 => {
984 indices.clone_from(&indices_template);
986 let q1_low = quickselect_by_index(data, &mut indices, k - 1)?.to_f64()?;
987 indices.clone_from(&indices_template);
988 let q1_high = quickselect_by_index(data, &mut indices, k)?.to_f64()?;
989 let q1 = f64::midpoint(q1_low, q1_high);
990
991 indices.clone_from(&indices_template);
992 let q2 = quickselect_by_index(data, &mut indices, 2 * k)?.to_f64()?;
993
994 indices.clone_from(&indices_template);
995 let q3_low = quickselect_by_index(data, &mut indices, 3 * k)?.to_f64()?;
996 indices.clone_from(&indices_template);
997 let q3_high = quickselect_by_index(data, &mut indices, 3 * k + 1)?.to_f64()?;
998 let q3 = f64::midpoint(q3_low, q3_high);
999
1000 Some((q1, q2, q3))
1001 }
1002 2 => {
1003 indices.clone_from(&indices_template);
1005 let q1 = quickselect_by_index(data, &mut indices, k)?.to_f64()?;
1006
1007 indices.clone_from(&indices_template);
1008 let q2_low = quickselect_by_index(data, &mut indices, 2 * k)?.to_f64()?;
1009 indices.clone_from(&indices_template);
1010 let q2_high = quickselect_by_index(data, &mut indices, 2 * k + 1)?.to_f64()?;
1011 let q2 = f64::midpoint(q2_low, q2_high);
1012
1013 indices.clone_from(&indices_template);
1014 let q3 = quickselect_by_index(data, &mut indices, 3 * k + 1)?.to_f64()?;
1015
1016 Some((q1, q2, q3))
1017 }
1018 _ => {
1019 indices.clone_from(&indices_template);
1021 let q1 = quickselect_by_index(data, &mut indices, k)?.to_f64()?;
1022 indices.clone_from(&indices_template);
1023 let q2 = quickselect_by_index(data, &mut indices, 2 * k + 1)?.to_f64()?;
1024 indices.clone_from(&indices_template);
1025 let q3 = quickselect_by_index(data, &mut indices, 3 * k + 2)?.to_f64()?;
1026
1027 Some((q1, q2, q3))
1028 }
1029 }
1030}
1031
1032fn mode_on_sorted<T, I>(it: I) -> Option<T>
1033where
1034 T: PartialOrd,
1035 I: Iterator<Item = T>,
1036{
1037 use std::cmp::Ordering;
1038
1039 let (mut mode, mut next) = (None, None);
1046 let (mut mode_count, mut next_count) = (0usize, 0usize);
1047 for x in it {
1048 if mode.as_ref() == Some(&x) {
1049 mode_count += 1;
1050 } else if next.as_ref() == Some(&x) {
1051 next_count += 1;
1052 } else {
1053 next = Some(x);
1054 next_count = 0;
1055 }
1056
1057 match next_count.cmp(&mode_count) {
1058 Ordering::Greater => {
1059 mode = next;
1060 mode_count = next_count;
1061 next = None;
1062 next_count = 0;
1063 }
1064 Ordering::Equal => {
1065 mode = None;
1066 mode_count = 0;
1067 }
1068 Ordering::Less => {}
1069 }
1070 }
1071 mode
1072}
1073
1074#[allow(clippy::type_complexity)]
1102#[inline]
1103fn modes_and_antimodes_on_sorted_slice<T>(
1104 data: &[Partial<T>],
1105) -> ((Vec<T>, usize, u32), (Vec<T>, usize, u32))
1106where
1107 T: PartialOrd + Clone,
1108{
1109 let size = data.len();
1110
1111 if size == 0 {
1113 return ((Vec::new(), 0, 0), (Vec::new(), 0, 0));
1114 }
1115
1116 let sqrt_size = if size == 0 {
1119 0
1120 } else {
1121 let mut x = size;
1122 let mut y = x.div_ceil(2);
1123 while y < x {
1124 x = y;
1125 y = usize::midpoint(x, size / x);
1126 }
1127 x
1128 };
1129 let mut runs: Vec<(&T, u32)> = Vec::with_capacity(sqrt_size.clamp(16, 1_000));
1130
1131 let mut current_value = &data[0].0;
1132 let mut current_count = 1;
1133 let mut highest_count = 1;
1134 let mut lowest_count = u32::MAX;
1135
1136 for x in data.iter().skip(1) {
1138 if x.0 == *current_value {
1139 current_count += 1;
1140 highest_count = highest_count.max(current_count);
1141 } else {
1142 runs.push((current_value, current_count));
1143 lowest_count = lowest_count.min(current_count);
1144 current_value = &x.0;
1145 current_count = 1;
1146 }
1147 }
1148 runs.push((current_value, current_count));
1149 lowest_count = lowest_count.min(current_count);
1150
1151 if runs.len() == 1 {
1153 let (val, count) = runs.pop().unwrap();
1154 return ((vec![val.clone()], 1, count), (Vec::new(), 0, 0));
1155 }
1156
1157 if highest_count == 1 {
1159 let antimodes_count = runs.len().min(10);
1160 let total_count = runs.len();
1161 let mut antimodes = Vec::with_capacity(antimodes_count);
1162 for (val, _) in runs.into_iter().take(antimodes_count) {
1163 antimodes.push(val.clone());
1164 }
1165 return ((Vec::new(), 0, 0), (antimodes, total_count, 1));
1167 }
1168
1169 let estimated_modes = (runs.len() / 10).clamp(1, 10);
1173 let estimated_antimodes = 10.min(runs.len());
1174
1175 let mut modes_indices = Vec::with_capacity(estimated_modes);
1177 let mut antimodes_indices = Vec::with_capacity(estimated_antimodes);
1178 let mut mode_count = 0;
1179 let mut antimodes_count = 0;
1180 let mut antimodes_collected = 0_u32;
1181
1182 for (idx, (_, count)) in runs.iter().enumerate() {
1184 if *count == highest_count {
1185 modes_indices.push(idx);
1186 mode_count += 1;
1187 }
1188 if *count == lowest_count {
1189 antimodes_count += 1;
1190 if antimodes_collected < 10 {
1191 antimodes_indices.push(idx);
1192 antimodes_collected += 1;
1193 }
1194 }
1195 }
1196
1197 let modes_result: Vec<T> = modes_indices
1199 .into_iter()
1200 .map(|idx| runs[idx].0.clone())
1201 .collect();
1202 let antimodes_result: Vec<T> = antimodes_indices
1203 .into_iter()
1204 .map(|idx| runs[idx].0.clone())
1205 .collect();
1206
1207 (
1208 (modes_result, mode_count, highest_count),
1209 (antimodes_result, antimodes_count, lowest_count),
1210 )
1211}
1212
1213#[allow(clippy::unsafe_derive_deserialize)]
1221#[derive(Clone, Serialize, Deserialize, Eq, PartialEq)]
1222pub struct Unsorted<T> {
1223 sorted: bool,
1224 data: Vec<Partial<T>>,
1225}
1226
1227impl<T: PartialOrd> Unsorted<T> {
1228 #[inline]
1230 #[must_use]
1231 pub fn new() -> Unsorted<T> {
1232 Default::default()
1233 }
1234
1235 #[allow(clippy::inline_always)]
1237 #[inline(always)]
1238 pub fn add(&mut self, v: T) {
1239 self.sorted = false;
1240 self.data.push(Partial(v));
1241 }
1242
1243 #[inline]
1245 #[must_use]
1246 pub const fn len(&self) -> usize {
1247 self.data.len()
1248 }
1249
1250 #[inline]
1251 #[must_use]
1252 pub const fn is_empty(&self) -> bool {
1253 self.data.is_empty()
1254 }
1255
1256 #[inline]
1257 fn sort(&mut self) {
1258 if !self.sorted {
1259 if self.data.len() < PARALLEL_THRESHOLD {
1261 self.data.sort_unstable();
1262 } else {
1263 self.data.par_sort_unstable();
1264 }
1265 self.sorted = true;
1266 }
1267 }
1268
1269 #[inline]
1270 const fn already_sorted(&mut self) {
1271 self.sorted = true;
1272 }
1273
1274 #[inline]
1276 pub fn add_bulk(&mut self, values: Vec<T>) {
1277 self.sorted = false;
1278 self.data.reserve(values.len());
1279 self.data.extend(values.into_iter().map(Partial));
1280 }
1281
1282 #[inline]
1284 pub fn shrink_to_fit(&mut self) {
1285 self.data.shrink_to_fit();
1286 }
1287
1288 #[inline]
1290 #[must_use]
1291 pub fn with_capacity(capacity: usize) -> Self {
1292 Unsorted {
1293 sorted: true,
1294 data: Vec::with_capacity(capacity),
1295 }
1296 }
1297
1298 #[inline]
1300 pub fn push_ascending(&mut self, value: T) {
1301 if let Some(last) = self.data.last() {
1302 debug_assert!(last.0 <= value, "Value must be >= than last element");
1303 }
1304 self.data.push(Partial(value));
1305 }
1307}
1308
1309impl<T: PartialOrd + PartialEq + Clone> Unsorted<T> {
1310 #[inline]
1311 pub fn cardinality(&mut self, sorted: bool, parallel_threshold: usize) -> u64 {
1319 const CHUNK_SIZE: usize = 2048; const DEFAULT_PARALLEL_THRESHOLD: usize = 10_240; let len = self.data.len();
1323 match len {
1324 0 => return 0,
1325 1 => return 1,
1326 _ => {}
1327 }
1328
1329 if sorted {
1330 self.already_sorted();
1331 } else {
1332 self.sort();
1333 }
1334
1335 let use_parallel = parallel_threshold != 0
1336 && (parallel_threshold == 1
1337 || len > parallel_threshold.max(DEFAULT_PARALLEL_THRESHOLD));
1338
1339 if use_parallel {
1340 type ChunkInfo<'a, T> = Vec<(u64, Option<&'a Partial<T>>, Option<&'a Partial<T>>)>;
1343 let chunk_info: ChunkInfo<'_, T> = self
1344 .data
1345 .par_chunks(CHUNK_SIZE)
1346 .map(|chunk| {
1347 let mut count = u64::from(!chunk.is_empty());
1349 for window in chunk.windows(2) {
1350 if unsafe { window.get_unchecked(0) != window.get_unchecked(1) } {
1352 count += 1;
1353 }
1354 }
1355 (count, chunk.first(), chunk.last())
1356 })
1357 .collect();
1358
1359 let mut total = 0;
1361 for (i, &(count, first_opt, _last_opt)) in chunk_info.iter().enumerate() {
1362 total += count;
1363
1364 if i > 0
1366 && let (Some(prev_last), Some(curr_first)) = (chunk_info[i - 1].2, first_opt)
1367 && prev_last == curr_first
1368 {
1369 total -= 1; }
1371 }
1372
1373 total
1374 } else {
1375 let mut count = u64::from(!self.data.is_empty());
1380
1381 for window in self.data.windows(2) {
1382 if unsafe { window.get_unchecked(0) != window.get_unchecked(1) } {
1384 count += 1;
1385 }
1386 }
1387 count
1388 }
1389 }
1390}
1391
1392impl<T: PartialOrd + Clone> Unsorted<T> {
1393 #[inline]
1395 pub fn mode(&mut self) -> Option<T> {
1396 if self.data.is_empty() {
1397 return None;
1398 }
1399 self.sort();
1400 mode_on_sorted(self.data.iter().map(|p| &p.0)).cloned()
1401 }
1402
1403 #[inline]
1407 fn modes(&mut self) -> (Vec<T>, usize, u32) {
1408 if self.data.is_empty() {
1409 return (Vec::new(), 0, 0);
1410 }
1411 self.sort();
1412 modes_and_antimodes_on_sorted_slice(&self.data).0
1413 }
1414
1415 #[inline]
1418 fn antimodes(&mut self) -> (Vec<T>, usize, u32) {
1419 if self.data.is_empty() {
1420 return (Vec::new(), 0, 0);
1421 }
1422 self.sort();
1423 modes_and_antimodes_on_sorted_slice(&self.data).1
1424 }
1425
1426 #[allow(clippy::type_complexity)]
1429 #[inline]
1430 pub fn modes_antimodes(&mut self) -> ((Vec<T>, usize, u32), (Vec<T>, usize, u32)) {
1431 if self.data.is_empty() {
1432 return ((Vec::new(), 0, 0), (Vec::new(), 0, 0));
1433 }
1434 self.sort();
1435 modes_and_antimodes_on_sorted_slice(&self.data)
1436 }
1437}
1438
1439impl<T: PartialOrd + ToPrimitive> Unsorted<T> {
1440 #[inline]
1442 pub fn median(&mut self) -> Option<f64> {
1443 if self.data.is_empty() {
1444 return None;
1445 }
1446 self.sort();
1447 median_on_sorted(&self.data)
1448 }
1449}
1450
1451impl<T: PartialOrd + ToPrimitive> Unsorted<T> {
1452 #[inline]
1454 pub fn mad(&mut self, existing_median: Option<f64>) -> Option<f64> {
1455 if self.data.is_empty() {
1456 return None;
1457 }
1458 if existing_median.is_none() {
1459 self.sort();
1460 }
1461 mad_on_sorted(&self.data, existing_median)
1462 }
1463}
1464
1465impl<T: PartialOrd + ToPrimitive> Unsorted<T> {
1466 #[inline]
1471 pub fn quartiles(&mut self) -> Option<(f64, f64, f64)> {
1472 if self.data.is_empty() {
1473 return None;
1474 }
1475 self.sort();
1476 quartiles_on_sorted(&self.data)
1477 }
1478}
1479
1480impl<T: PartialOrd + ToPrimitive + Sync> Unsorted<T> {
1481 #[inline]
1487 pub fn gini(&mut self) -> Option<f64> {
1488 if self.data.is_empty() {
1489 return None;
1490 }
1491 self.sort();
1492 gini_on_sorted(&self.data)
1493 }
1494
1495 #[inline]
1502 pub fn kurtosis(&mut self) -> Option<f64> {
1503 if self.data.is_empty() {
1504 return None;
1505 }
1506 self.sort();
1507 kurtosis_on_sorted(&self.data)
1508 }
1509
1510 #[inline]
1517 pub fn percentile_rank<V>(&mut self, value: V) -> Option<f64>
1518 where
1519 V: PartialOrd + ToPrimitive,
1520 {
1521 if self.data.is_empty() {
1522 return None;
1523 }
1524 self.sort();
1525 percentile_rank_on_sorted(&self.data, &value)
1526 }
1527
1528 #[inline]
1543 pub fn atkinson(&mut self, epsilon: f64) -> Option<f64> {
1544 if self.data.is_empty() {
1545 return None;
1546 }
1547 self.sort();
1548 atkinson_on_sorted(&self.data, epsilon)
1549 }
1550}
1551
1552impl<T: PartialOrd + ToPrimitive + Clone> Unsorted<T> {
1553 #[inline]
1565 pub fn quartiles_with_selection(&mut self) -> Option<(f64, f64, f64)> {
1566 if self.data.is_empty() {
1567 return None;
1568 }
1569 if self.sorted {
1571 return quartiles_with_zero_copy_selection(&self.data);
1572 }
1573 let mut data_copy: Vec<Partial<T>> =
1575 self.data.iter().map(|x| Partial(x.0.clone())).collect();
1576 quartiles_with_selection(&mut data_copy)
1577 }
1578}
1579
1580impl<T: PartialOrd + ToPrimitive> Unsorted<T> {
1581 #[inline]
1587 #[must_use]
1588 pub fn quartiles_zero_copy(&self) -> Option<(f64, f64, f64)> {
1589 if self.data.is_empty() {
1590 return None;
1591 }
1592 quartiles_with_zero_copy_selection(&self.data)
1593 }
1594}
1595
1596impl<T: PartialOrd> Commute for Unsorted<T> {
1597 #[inline]
1598 fn merge(&mut self, mut v: Unsorted<T>) {
1599 if v.is_empty() {
1600 return;
1601 }
1602
1603 self.sorted = false;
1604 self.data.extend(std::mem::take(&mut v.data));
1606 }
1607}
1608
1609impl<T: PartialOrd> Default for Unsorted<T> {
1610 #[inline]
1611 fn default() -> Unsorted<T> {
1612 Unsorted {
1613 data: Vec::with_capacity(10_000),
1614 sorted: true, }
1616 }
1617}
1618
1619impl<T: PartialOrd> FromIterator<T> for Unsorted<T> {
1620 #[inline]
1621 fn from_iter<I: IntoIterator<Item = T>>(it: I) -> Unsorted<T> {
1622 let mut v = Unsorted::new();
1623 v.extend(it);
1624 v
1625 }
1626}
1627
1628impl<T: PartialOrd> Extend<T> for Unsorted<T> {
1629 #[inline]
1630 fn extend<I: IntoIterator<Item = T>>(&mut self, it: I) {
1631 self.sorted = false;
1632 self.data.extend(it.into_iter().map(Partial));
1633 }
1634}
1635
1636fn custom_percentiles_on_sorted<T>(data: &[Partial<T>], percentiles: &[u8]) -> Option<Vec<T>>
1637where
1638 T: PartialOrd + Clone,
1639{
1640 let len = data.len();
1641
1642 if len == 0 || percentiles.iter().any(|&p| p > 100) {
1644 return None;
1645 }
1646
1647 let unique_percentiles: Vec<u8> = if percentiles.len() <= 1 {
1649 percentiles.to_vec()
1651 } else {
1652 let is_sorted_unique = percentiles.windows(2).all(|w| w[0] < w[1]);
1654
1655 if is_sorted_unique {
1656 percentiles.to_vec()
1658 } else {
1659 use std::collections::HashSet;
1661 let mut seen = HashSet::with_capacity(percentiles.len().min(100));
1662 let mut sorted_unique = Vec::with_capacity(percentiles.len());
1663 for &p in percentiles {
1664 if seen.insert(p) {
1665 sorted_unique.push(p);
1666 }
1667 }
1668 sorted_unique.sort_unstable();
1669 sorted_unique
1670 }
1671 };
1672
1673 let mut results = Vec::with_capacity(unique_percentiles.len());
1674
1675 unsafe {
1679 for &p in &unique_percentiles {
1680 #[allow(clippy::cast_sign_loss)]
1684 let rank = ((f64::from(p) / 100.0) * len as f64).ceil() as usize;
1685
1686 let idx = rank.saturating_sub(1);
1688
1689 results.push(data.get_unchecked(idx).0.clone());
1691 }
1692 }
1693
1694 Some(results)
1695}
1696
1697impl<T: PartialOrd + Clone> Unsorted<T> {
1698 #[inline]
1720 pub fn custom_percentiles(&mut self, percentiles: &[u8]) -> Option<Vec<T>> {
1721 if self.data.is_empty() {
1722 return None;
1723 }
1724 self.sort();
1725 custom_percentiles_on_sorted(&self.data, percentiles)
1726 }
1727}
1728
1729#[cfg(test)]
1730mod test {
1731 use super::*;
1732
1733 #[test]
1734 fn test_cardinality_empty() {
1735 let mut unsorted: Unsorted<i32> = Unsorted::new();
1736 assert_eq!(unsorted.cardinality(false, 1), 0);
1737 }
1738
1739 #[test]
1740 fn test_cardinality_single_element() {
1741 let mut unsorted = Unsorted::new();
1742 unsorted.add(5);
1743 assert_eq!(unsorted.cardinality(false, 1), 1);
1744 }
1745
1746 #[test]
1747 fn test_cardinality_unique_elements() {
1748 let mut unsorted = Unsorted::new();
1749 unsorted.extend(vec![1, 2, 3, 4, 5]);
1750 assert_eq!(unsorted.cardinality(false, 1), 5);
1751 }
1752
1753 #[test]
1754 fn test_cardinality_duplicate_elements() {
1755 let mut unsorted = Unsorted::new();
1756 unsorted.extend(vec![1, 2, 2, 3, 3, 3, 4, 4, 4, 4]);
1757 assert_eq!(unsorted.cardinality(false, 1), 4);
1758 }
1759
1760 #[test]
1761 fn test_cardinality_all_same() {
1762 let mut unsorted = Unsorted::new();
1763 unsorted.extend(vec![1; 100]);
1764 assert_eq!(unsorted.cardinality(false, 1), 1);
1765 }
1766
1767 #[test]
1768 fn test_cardinality_large_range() {
1769 let mut unsorted = Unsorted::new();
1770 unsorted.extend(0..1_000_000);
1771 assert_eq!(unsorted.cardinality(false, 1), 1_000_000);
1772 }
1773
1774 #[test]
1775 fn test_cardinality_large_range_sequential() {
1776 let mut unsorted = Unsorted::new();
1777 unsorted.extend(0..1_000_000);
1778 assert_eq!(unsorted.cardinality(false, 2_000_000), 1_000_000);
1779 }
1780
1781 #[test]
1782 fn test_cardinality_presorted() {
1783 let mut unsorted = Unsorted::new();
1784 unsorted.extend(vec![1, 2, 3, 4, 5]);
1785 unsorted.sort();
1786 assert_eq!(unsorted.cardinality(true, 1), 5);
1787 }
1788
1789 #[test]
1790 fn test_cardinality_float() {
1791 let mut unsorted = Unsorted::new();
1792 unsorted.extend(vec![1.0, 1.0, 2.0, 3.0, 3.0, 4.0]);
1793 assert_eq!(unsorted.cardinality(false, 1), 4);
1794 }
1795
1796 #[test]
1797 fn test_cardinality_string() {
1798 let mut unsorted = Unsorted::new();
1799 unsorted.extend(vec!["a", "b", "b", "c", "c", "c"]);
1800 assert_eq!(unsorted.cardinality(false, 1), 3);
1801 }
1802
1803 #[test]
1804 fn test_quartiles_selection_vs_sorted() {
1805 let test_cases = vec![
1807 vec![3, 5, 7, 9],
1808 vec![3, 5, 7],
1809 vec![1, 2, 7, 11],
1810 vec![3, 5, 7, 9, 12],
1811 vec![2, 2, 3, 8, 10],
1812 vec![3, 5, 7, 9, 12, 20],
1813 vec![0, 2, 4, 8, 10, 11],
1814 vec![3, 5, 7, 9, 12, 20, 21],
1815 vec![1, 5, 6, 6, 7, 10, 19],
1816 ];
1817
1818 for test_case in test_cases {
1819 let mut unsorted1 = Unsorted::new();
1820 let mut unsorted2 = Unsorted::new();
1821 let mut unsorted3 = Unsorted::new();
1822 unsorted1.extend(test_case.clone());
1823 unsorted2.extend(test_case.clone());
1824 unsorted3.extend(test_case.clone());
1825
1826 let result_sorted = unsorted1.quartiles();
1827 let result_selection = unsorted2.quartiles_with_selection();
1828 let result_zero_copy = unsorted3.quartiles_zero_copy();
1829
1830 assert_eq!(
1831 result_sorted, result_selection,
1832 "Selection mismatch for test case: {:?}",
1833 test_case
1834 );
1835 assert_eq!(
1836 result_sorted, result_zero_copy,
1837 "Zero-copy mismatch for test case: {:?}",
1838 test_case
1839 );
1840 }
1841 }
1842
1843 #[test]
1844 fn test_quartiles_with_selection_small() {
1845 let mut unsorted: Unsorted<i32> = Unsorted::new();
1847 assert_eq!(unsorted.quartiles_with_selection(), None);
1848
1849 let mut unsorted = Unsorted::new();
1850 unsorted.extend(vec![1, 2]);
1851 assert_eq!(unsorted.quartiles_with_selection(), None);
1852
1853 let mut unsorted = Unsorted::new();
1854 unsorted.extend(vec![1, 2, 3]);
1855 assert_eq!(unsorted.quartiles_with_selection(), Some((1.0, 2.0, 3.0)));
1856 }
1857
1858 #[test]
1859 fn test_quickselect() {
1860 let data = vec![
1861 Partial(3),
1862 Partial(1),
1863 Partial(4),
1864 Partial(1),
1865 Partial(5),
1866 Partial(9),
1867 Partial(2),
1868 Partial(6),
1869 ];
1870
1871 assert_eq!(quickselect(&mut data.clone(), 0), Some(&1));
1873 assert_eq!(quickselect(&mut data.clone(), 3), Some(&3));
1874 assert_eq!(quickselect(&mut data.clone(), 7), Some(&9));
1875
1876 let mut empty: Vec<Partial<i32>> = vec![];
1878 assert_eq!(quickselect(&mut empty, 0), None);
1879
1880 let mut data = vec![Partial(3), Partial(1), Partial(4), Partial(1), Partial(5)];
1881 assert_eq!(quickselect(&mut data, 10), None); }
1883
1884 #[test]
1885 fn median_stream() {
1886 assert_eq!(median(vec![3usize, 5, 7, 9].into_iter()), Some(6.0));
1887 assert_eq!(median(vec![3usize, 5, 7].into_iter()), Some(5.0));
1888 }
1889
1890 #[test]
1891 fn mad_stream() {
1892 assert_eq!(mad(vec![3usize, 5, 7, 9].into_iter(), None), Some(2.0));
1893 assert_eq!(
1894 mad(
1895 vec![
1896 86usize, 60, 95, 39, 49, 12, 56, 82, 92, 24, 33, 28, 46, 34, 100, 39, 100, 38,
1897 50, 61, 39, 88, 5, 13, 64
1898 ]
1899 .into_iter(),
1900 None
1901 ),
1902 Some(16.0)
1903 );
1904 }
1905
1906 #[test]
1907 fn mad_stream_precalc_median() {
1908 let data = vec![3usize, 5, 7, 9].into_iter();
1909 let median1 = median(data.clone());
1910 assert_eq!(mad(data, median1), Some(2.0));
1911
1912 let data2 = vec![
1913 86usize, 60, 95, 39, 49, 12, 56, 82, 92, 24, 33, 28, 46, 34, 100, 39, 100, 38, 50, 61,
1914 39, 88, 5, 13, 64,
1915 ]
1916 .into_iter();
1917 let median2 = median(data2.clone());
1918 assert_eq!(mad(data2, median2), Some(16.0));
1919 }
1920
1921 #[test]
1922 fn mode_stream() {
1923 assert_eq!(mode(vec![3usize, 5, 7, 9].into_iter()), None);
1924 assert_eq!(mode(vec![3usize, 3, 3, 3].into_iter()), Some(3));
1925 assert_eq!(mode(vec![3usize, 3, 3, 4].into_iter()), Some(3));
1926 assert_eq!(mode(vec![4usize, 3, 3, 3].into_iter()), Some(3));
1927 assert_eq!(mode(vec![1usize, 1, 2, 3, 3].into_iter()), None);
1928 }
1929
1930 #[test]
1931 fn median_floats() {
1932 assert_eq!(median(vec![3.0f64, 5.0, 7.0, 9.0].into_iter()), Some(6.0));
1933 assert_eq!(median(vec![3.0f64, 5.0, 7.0].into_iter()), Some(5.0));
1934 }
1935
1936 #[test]
1937 fn mode_floats() {
1938 assert_eq!(mode(vec![3.0f64, 5.0, 7.0, 9.0].into_iter()), None);
1939 assert_eq!(mode(vec![3.0f64, 3.0, 3.0, 3.0].into_iter()), Some(3.0));
1940 assert_eq!(mode(vec![3.0f64, 3.0, 3.0, 4.0].into_iter()), Some(3.0));
1941 assert_eq!(mode(vec![4.0f64, 3.0, 3.0, 3.0].into_iter()), Some(3.0));
1942 assert_eq!(mode(vec![1.0f64, 1.0, 2.0, 3.0, 3.0].into_iter()), None);
1943 }
1944
1945 #[test]
1946 fn modes_stream() {
1947 assert_eq!(modes(vec![3usize, 5, 7, 9].into_iter()), (vec![], 0, 0));
1948 assert_eq!(modes(vec![3usize, 3, 3, 3].into_iter()), (vec![3], 1, 4));
1949 assert_eq!(modes(vec![3usize, 3, 4, 4].into_iter()), (vec![3, 4], 2, 2));
1950 assert_eq!(modes(vec![4usize, 3, 3, 3].into_iter()), (vec![3], 1, 3));
1951 assert_eq!(modes(vec![1usize, 1, 2, 2].into_iter()), (vec![1, 2], 2, 2));
1952 let vec: Vec<u32> = vec![];
1953 assert_eq!(modes(vec.into_iter()), (vec![], 0, 0));
1954 }
1955
1956 #[test]
1957 fn modes_floats() {
1958 assert_eq!(
1959 modes(vec![3_f64, 5.0, 7.0, 9.0].into_iter()),
1960 (vec![], 0, 0)
1961 );
1962 assert_eq!(
1963 modes(vec![3_f64, 3.0, 3.0, 3.0].into_iter()),
1964 (vec![3.0], 1, 4)
1965 );
1966 assert_eq!(
1967 modes(vec![3_f64, 3.0, 4.0, 4.0].into_iter()),
1968 (vec![3.0, 4.0], 2, 2)
1969 );
1970 assert_eq!(
1971 modes(vec![1_f64, 1.0, 2.0, 3.0, 3.0].into_iter()),
1972 (vec![1.0, 3.0], 2, 2)
1973 );
1974 }
1975
1976 #[test]
1977 fn antimodes_stream() {
1978 assert_eq!(
1979 antimodes(vec![3usize, 5, 7, 9].into_iter()),
1980 (vec![3, 5, 7, 9], 4, 1)
1981 );
1982 assert_eq!(
1983 antimodes(vec![1usize, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13].into_iter()),
1984 (vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 13, 1)
1985 );
1986 assert_eq!(
1987 antimodes(vec![1usize, 3, 3, 3].into_iter()),
1988 (vec![1], 1, 1)
1989 );
1990 assert_eq!(
1991 antimodes(vec![3usize, 3, 4, 4].into_iter()),
1992 (vec![3, 4], 2, 2)
1993 );
1994 assert_eq!(
1995 antimodes(
1996 vec![
1997 3usize, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
1998 14, 14, 15, 15
1999 ]
2000 .into_iter()
2001 ),
2002 (vec![3, 4, 5, 6, 7, 8, 9, 10, 11, 12], 13, 2)
2004 );
2005 assert_eq!(
2006 antimodes(
2007 vec![
2008 3usize, 3, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 4, 4, 5, 5, 6, 6, 7, 7, 13, 13,
2009 14, 14, 15, 15
2010 ]
2011 .into_iter()
2012 ),
2013 (vec![3, 4, 5, 6, 7, 8, 9, 10, 11, 12], 13, 2)
2014 );
2015 assert_eq!(
2016 antimodes(vec![3usize, 3, 3, 4].into_iter()),
2017 (vec![4], 1, 1)
2018 );
2019 assert_eq!(
2020 antimodes(vec![4usize, 3, 3, 3].into_iter()),
2021 (vec![4], 1, 1)
2022 );
2023 assert_eq!(
2024 antimodes(vec![1usize, 1, 2, 2].into_iter()),
2025 (vec![1, 2], 2, 2)
2026 );
2027 let vec: Vec<u32> = vec![];
2028 assert_eq!(antimodes(vec.into_iter()), (vec![], 0, 0));
2029 }
2030
2031 #[test]
2032 fn antimodes_floats() {
2033 assert_eq!(
2034 antimodes(vec![3_f64, 5.0, 7.0, 9.0].into_iter()),
2035 (vec![3.0, 5.0, 7.0, 9.0], 4, 1)
2036 );
2037 assert_eq!(
2038 antimodes(vec![3_f64, 3.0, 3.0, 3.0].into_iter()),
2039 (vec![], 0, 0)
2040 );
2041 assert_eq!(
2042 antimodes(vec![3_f64, 3.0, 4.0, 4.0].into_iter()),
2043 (vec![3.0, 4.0], 2, 2)
2044 );
2045 assert_eq!(
2046 antimodes(vec![1_f64, 1.0, 2.0, 3.0, 3.0].into_iter()),
2047 (vec![2.0], 1, 1)
2048 );
2049 }
2050
2051 #[test]
2052 fn test_custom_percentiles() {
2053 let mut unsorted: Unsorted<i32> = Unsorted::new();
2055 unsorted.extend(1..=11); let result = unsorted.custom_percentiles(&[25, 50, 75]).unwrap();
2058 assert_eq!(result, vec![3, 6, 9]);
2059
2060 let mut str_data = Unsorted::new();
2062 str_data.extend(vec!["a", "b", "c", "d", "e"]);
2063 let result = str_data.custom_percentiles(&[20, 40, 60, 80]).unwrap();
2064 assert_eq!(result, vec!["a", "b", "c", "d"]);
2065
2066 let mut char_data = Unsorted::new();
2068 char_data.extend('a'..='e');
2069 let result = char_data.custom_percentiles(&[25, 50, 75]).unwrap();
2070 assert_eq!(result, vec!['b', 'c', 'd']);
2071
2072 let mut float_data = Unsorted::new();
2074 float_data.extend(vec![1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9]);
2075 let result = float_data
2076 .custom_percentiles(&[10, 30, 50, 70, 90])
2077 .unwrap();
2078 assert_eq!(result, vec![1.1, 3.3, 5.5, 7.7, 9.9]);
2079
2080 let result = float_data.custom_percentiles(&[]).unwrap();
2082 assert_eq!(result, Vec::<f64>::new());
2083
2084 let result = float_data.custom_percentiles(&[50, 50, 50]).unwrap();
2086 assert_eq!(result, vec![5.5]);
2087
2088 let result = float_data.custom_percentiles(&[0, 100]).unwrap();
2090 assert_eq!(result, vec![1.1, 9.9]);
2091
2092 let result = float_data.custom_percentiles(&[75, 25, 50]).unwrap();
2094 assert_eq!(result, vec![3.3, 5.5, 7.7]); let mut single = Unsorted::new();
2098 single.add(42);
2099 let result = single.custom_percentiles(&[0, 50, 100]).unwrap();
2100 assert_eq!(result, vec![42, 42, 42]);
2101 }
2102
2103 #[test]
2104 fn quartiles_stream() {
2105 assert_eq!(
2106 quartiles(vec![3usize, 5, 7].into_iter()),
2107 Some((3., 5., 7.))
2108 );
2109 assert_eq!(
2110 quartiles(vec![3usize, 5, 7, 9].into_iter()),
2111 Some((4., 6., 8.))
2112 );
2113 assert_eq!(
2114 quartiles(vec![1usize, 2, 7, 11].into_iter()),
2115 Some((1.5, 4.5, 9.))
2116 );
2117 assert_eq!(
2118 quartiles(vec![3usize, 5, 7, 9, 12].into_iter()),
2119 Some((4., 7., 10.5))
2120 );
2121 assert_eq!(
2122 quartiles(vec![2usize, 2, 3, 8, 10].into_iter()),
2123 Some((2., 3., 9.))
2124 );
2125 assert_eq!(
2126 quartiles(vec![3usize, 5, 7, 9, 12, 20].into_iter()),
2127 Some((5., 8., 12.))
2128 );
2129 assert_eq!(
2130 quartiles(vec![0usize, 2, 4, 8, 10, 11].into_iter()),
2131 Some((2., 6., 10.))
2132 );
2133 assert_eq!(
2134 quartiles(vec![3usize, 5, 7, 9, 12, 20, 21].into_iter()),
2135 Some((5., 9., 20.))
2136 );
2137 assert_eq!(
2138 quartiles(vec![1usize, 5, 6, 6, 7, 10, 19].into_iter()),
2139 Some((5., 6., 10.))
2140 );
2141 }
2142
2143 #[test]
2144 fn quartiles_floats() {
2145 assert_eq!(
2146 quartiles(vec![3_f64, 5., 7.].into_iter()),
2147 Some((3., 5., 7.))
2148 );
2149 assert_eq!(
2150 quartiles(vec![3_f64, 5., 7., 9.].into_iter()),
2151 Some((4., 6., 8.))
2152 );
2153 assert_eq!(
2154 quartiles(vec![3_f64, 5., 7., 9., 12.].into_iter()),
2155 Some((4., 7., 10.5))
2156 );
2157 assert_eq!(
2158 quartiles(vec![3_f64, 5., 7., 9., 12., 20.].into_iter()),
2159 Some((5., 8., 12.))
2160 );
2161 assert_eq!(
2162 quartiles(vec![3_f64, 5., 7., 9., 12., 20., 21.].into_iter()),
2163 Some((5., 9., 20.))
2164 );
2165 }
2166
2167 #[test]
2168 fn test_quartiles_zero_copy_small() {
2169 let unsorted: Unsorted<i32> = Unsorted::new();
2171 assert_eq!(unsorted.quartiles_zero_copy(), None);
2172
2173 let mut unsorted = Unsorted::new();
2174 unsorted.extend(vec![1, 2]);
2175 assert_eq!(unsorted.quartiles_zero_copy(), None);
2176
2177 let mut unsorted = Unsorted::new();
2178 unsorted.extend(vec![1, 2, 3]);
2179 assert_eq!(unsorted.quartiles_zero_copy(), Some((1.0, 2.0, 3.0)));
2180
2181 let mut unsorted = Unsorted::new();
2183 unsorted.extend(vec![3, 5, 7, 9]);
2184 assert_eq!(unsorted.quartiles_zero_copy(), Some((4.0, 6.0, 8.0)));
2185 }
2186
2187 #[test]
2188 fn gini_empty() {
2189 let mut unsorted: Unsorted<i32> = Unsorted::new();
2190 assert_eq!(unsorted.gini(), None);
2191 let empty_vec: Vec<i32> = vec![];
2192 assert_eq!(gini(empty_vec.into_iter()), None);
2193 }
2194
2195 #[test]
2196 fn gini_single_element() {
2197 let mut unsorted = Unsorted::new();
2198 unsorted.add(5);
2199 assert_eq!(unsorted.gini(), Some(0.0));
2200 assert_eq!(gini(vec![5].into_iter()), Some(0.0));
2201 }
2202
2203 #[test]
2204 fn gini_perfect_equality() {
2205 let mut unsorted = Unsorted::new();
2207 unsorted.extend(vec![10, 10, 10, 10, 10]);
2208 let result = unsorted.gini().unwrap();
2209 assert!((result - 0.0).abs() < 1e-10, "Expected 0.0, got {}", result);
2210
2211 assert!((gini(vec![10, 10, 10, 10, 10].into_iter()).unwrap() - 0.0).abs() < 1e-10);
2212 }
2213
2214 #[test]
2215 fn gini_perfect_inequality() {
2216 let mut unsorted = Unsorted::new();
2219 unsorted.extend(vec![0, 0, 0, 0, 100]);
2220 let result = unsorted.gini().unwrap();
2221 assert!((result - 0.8).abs() < 1e-10, "Expected 0.8, got {}", result);
2224 }
2225
2226 #[test]
2227 fn gini_stream() {
2228 let result = gini(vec![1usize, 2, 3, 4, 5].into_iter()).unwrap();
2235 let expected = (2.0 * 55.0) / (5.0 * 15.0) - 6.0 / 5.0;
2236 assert!(
2237 (result - expected).abs() < 1e-10,
2238 "Expected {}, got {}",
2239 expected,
2240 result
2241 );
2242 }
2243
2244 #[test]
2245 fn gini_floats() {
2246 let mut unsorted = Unsorted::new();
2247 unsorted.extend(vec![1.0, 2.0, 3.0, 4.0, 5.0]);
2248 let result = unsorted.gini().unwrap();
2249 let expected = (2.0 * 55.0) / (5.0 * 15.0) - 6.0 / 5.0;
2250 assert!((result - expected).abs() < 1e-10);
2251
2252 assert!(
2253 (gini(vec![1.0f64, 2.0, 3.0, 4.0, 5.0].into_iter()).unwrap() - expected).abs() < 1e-10
2254 );
2255 }
2256
2257 #[test]
2258 fn gini_all_zeros() {
2259 let mut unsorted = Unsorted::new();
2261 unsorted.extend(vec![0, 0, 0, 0]);
2262 assert_eq!(unsorted.gini(), None);
2263 assert_eq!(gini(vec![0, 0, 0, 0].into_iter()), None);
2264 }
2265
2266 #[test]
2267 fn gini_negative_values() {
2268 let mut unsorted = Unsorted::new();
2270 unsorted.extend(vec![-5, -3, -1, 1, 3, 5]);
2271 let result = unsorted.gini();
2272 assert_eq!(result, None);
2274
2275 let mut unsorted = Unsorted::new();
2277 unsorted.extend(vec![-2, -1, 0, 1, 2]);
2278 let result = unsorted.gini();
2279 assert_eq!(result, None);
2281
2282 let mut unsorted = Unsorted::new();
2284 unsorted.extend(vec![-1, 0, 1, 2, 3]);
2285 let result = unsorted.gini();
2286 assert!(result.is_some());
2287 }
2288
2289 #[test]
2290 fn gini_known_cases() {
2291 let mut unsorted = Unsorted::new();
2293 unsorted.extend(vec![1, 1, 1, 1, 1]);
2294 let result = unsorted.gini().unwrap();
2295 assert!((result - 0.0).abs() < 1e-10);
2296
2297 let mut unsorted = Unsorted::new();
2299 unsorted.extend(vec![0, 0, 0, 0, 1]);
2300 let result = unsorted.gini().unwrap();
2301 assert!((result - 0.8).abs() < 1e-10);
2303
2304 let mut unsorted = Unsorted::new();
2306 unsorted.extend(vec![1, 2, 3]);
2307 let result = unsorted.gini().unwrap();
2308 let expected = (2.0 * 14.0) / (3.0 * 6.0) - 4.0 / 3.0;
2311 assert!((result - expected).abs() < 1e-10);
2312 }
2313
2314 #[test]
2315 fn kurtosis_empty() {
2316 let mut unsorted: Unsorted<i32> = Unsorted::new();
2317 assert_eq!(unsorted.kurtosis(), None);
2318 let empty_vec: Vec<i32> = vec![];
2319 assert_eq!(kurtosis(empty_vec.into_iter()), None);
2320 }
2321
2322 #[test]
2323 fn kurtosis_small() {
2324 let mut unsorted = Unsorted::new();
2326 unsorted.extend(vec![1, 2]);
2327 assert_eq!(unsorted.kurtosis(), None);
2328
2329 let mut unsorted = Unsorted::new();
2330 unsorted.extend(vec![1, 2, 3]);
2331 assert_eq!(unsorted.kurtosis(), None);
2332 }
2333
2334 #[test]
2335 fn kurtosis_normal_distribution() {
2336 let mut unsorted = Unsorted::new();
2338 unsorted.extend(vec![1, 2, 3, 4, 5]);
2339 let result = unsorted.kurtosis();
2340 assert!(result.is_some());
2341 }
2343
2344 #[test]
2345 fn kurtosis_all_same() {
2346 let mut unsorted = Unsorted::new();
2348 unsorted.extend(vec![5, 5, 5, 5]);
2349 assert_eq!(unsorted.kurtosis(), None);
2350 }
2351
2352 #[test]
2353 fn kurtosis_stream() {
2354 let result = kurtosis(vec![1usize, 2, 3, 4, 5].into_iter());
2355 assert!(result.is_some());
2356 }
2357
2358 #[test]
2359 fn percentile_rank_empty() {
2360 let mut unsorted: Unsorted<i32> = Unsorted::new();
2361 assert_eq!(unsorted.percentile_rank(5), None);
2362 let empty_vec: Vec<i32> = vec![];
2363 assert_eq!(percentile_rank(empty_vec.into_iter(), 5), None);
2364 }
2365
2366 #[test]
2367 fn percentile_rank_basic() {
2368 let mut unsorted = Unsorted::new();
2369 unsorted.extend(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
2370
2371 assert_eq!(unsorted.percentile_rank(0), Some(0.0));
2373
2374 assert_eq!(unsorted.percentile_rank(11), Some(100.0));
2376
2377 let rank = unsorted.percentile_rank(5).unwrap();
2379 assert!((rank - 50.0).abs() < 1.0);
2380
2381 let rank = unsorted.percentile_rank(1).unwrap();
2383 assert!((rank - 10.0).abs() < 1.0);
2384 }
2385
2386 #[test]
2387 fn percentile_rank_duplicates() {
2388 let mut unsorted = Unsorted::new();
2389 unsorted.extend(vec![1, 1, 2, 2, 3, 3, 4, 4, 5, 5]);
2390
2391 let rank = unsorted.percentile_rank(2).unwrap();
2393 assert!((rank - 40.0).abs() < 1.0);
2394 }
2395
2396 #[test]
2397 fn percentile_rank_stream() {
2398 let result = percentile_rank(vec![1usize, 2, 3, 4, 5].into_iter(), 3);
2399 assert_eq!(result, Some(60.0)); }
2401
2402 #[test]
2403 fn atkinson_empty() {
2404 let mut unsorted: Unsorted<i32> = Unsorted::new();
2405 assert_eq!(unsorted.atkinson(1.0), None);
2406 let empty_vec: Vec<i32> = vec![];
2407 assert_eq!(atkinson(empty_vec.into_iter(), 1.0), None);
2408 }
2409
2410 #[test]
2411 fn atkinson_single_element() {
2412 let mut unsorted = Unsorted::new();
2413 unsorted.add(5);
2414 assert_eq!(unsorted.atkinson(1.0), Some(0.0));
2415 assert_eq!(atkinson(vec![5].into_iter(), 1.0), Some(0.0));
2416 }
2417
2418 #[test]
2419 fn atkinson_perfect_equality() {
2420 let mut unsorted = Unsorted::new();
2422 unsorted.extend(vec![10, 10, 10, 10, 10]);
2423 let result = unsorted.atkinson(1.0).unwrap();
2424 assert!((result - 0.0).abs() < 1e-10);
2425 }
2426
2427 #[test]
2428 fn atkinson_epsilon_zero() {
2429 let mut unsorted = Unsorted::new();
2431 unsorted.extend(vec![1, 2, 3, 4, 5]);
2432 let result = unsorted.atkinson(0.0).unwrap();
2433 assert!((result - 0.0).abs() < 1e-10);
2434 }
2435
2436 #[test]
2437 fn atkinson_epsilon_one() {
2438 let mut unsorted = Unsorted::new();
2440 unsorted.extend(vec![1, 2, 3, 4, 5]);
2441 let result = unsorted.atkinson(1.0);
2442 assert!(result.is_some());
2443 }
2444
2445 #[test]
2446 fn atkinson_negative_epsilon() {
2447 let mut unsorted = Unsorted::new();
2448 unsorted.extend(vec![1, 2, 3, 4, 5]);
2449 assert_eq!(unsorted.atkinson(-1.0), None);
2450 }
2451
2452 #[test]
2453 fn atkinson_zero_mean() {
2454 let mut unsorted = Unsorted::new();
2456 unsorted.extend(vec![0, 0, 0, 0]);
2457 assert_eq!(unsorted.atkinson(1.0), None);
2458 }
2459
2460 #[test]
2461 fn atkinson_stream() {
2462 let result = atkinson(vec![1usize, 2, 3, 4, 5].into_iter(), 1.0);
2463 assert!(result.is_some());
2464 }
2465}
2466
2467#[cfg(test)]
2468mod bench {
2469 use super::*;
2470 use std::time::Instant;
2471
2472 #[test]
2473 #[ignore] fn comprehensive_quartiles_benchmark() {
2475 let data_sizes = vec![
2477 1_000, 10_000, 100_000, 500_000, 1_000_000, 2_000_000, 5_000_000, 10_000_000,
2478 ];
2479
2480 println!("=== COMPREHENSIVE QUARTILES BENCHMARK ===\n");
2481
2482 for size in data_sizes {
2483 println!("--- Testing with {} elements ---", size);
2484
2485 let test_patterns = vec![
2487 ("Random", generate_random_data(size)),
2488 ("Reverse Sorted", {
2489 let mut v = Vec::with_capacity(size);
2490 for x in (0..size).rev() {
2491 v.push(x as i32);
2492 }
2493 v
2494 }),
2495 ("Already Sorted", {
2496 let mut v = Vec::with_capacity(size);
2497 for x in 0..size {
2498 v.push(x as i32);
2499 }
2500 v
2501 }),
2502 ("Many Duplicates", {
2503 let mut v = Vec::with_capacity(size);
2505 let chunk_size = size / 100;
2506 for i in 0..100 {
2507 v.extend(std::iter::repeat_n(i, chunk_size));
2508 }
2509 v.extend(std::iter::repeat_n(0, size - v.len()));
2511 v
2512 }),
2513 ];
2514
2515 for (pattern_name, test_data) in test_patterns {
2516 println!("\n Pattern: {}", pattern_name);
2517
2518 let mut unsorted1 = Unsorted::new();
2520 unsorted1.extend(test_data.clone());
2521
2522 let start = Instant::now();
2523 let result_sorted = unsorted1.quartiles();
2524 let sorted_time = start.elapsed();
2525
2526 let mut unsorted2 = Unsorted::new();
2528 unsorted2.extend(test_data.clone());
2529
2530 let start = Instant::now();
2531 let result_selection = unsorted2.quartiles_with_selection();
2532 let selection_time = start.elapsed();
2533
2534 let mut unsorted3 = Unsorted::new();
2536 unsorted3.extend(test_data);
2537
2538 let start = Instant::now();
2539 let result_zero_copy = unsorted3.quartiles_zero_copy();
2540 let zero_copy_time = start.elapsed();
2541
2542 assert_eq!(result_sorted, result_selection);
2544 assert_eq!(result_sorted, result_zero_copy);
2545
2546 let selection_speedup =
2547 sorted_time.as_nanos() as f64 / selection_time.as_nanos() as f64;
2548 let zero_copy_speedup =
2549 sorted_time.as_nanos() as f64 / zero_copy_time.as_nanos() as f64;
2550
2551 println!(" Sorting: {:>12?}", sorted_time);
2552 println!(
2553 " Selection: {:>12?} (speedup: {:.2}x)",
2554 selection_time, selection_speedup
2555 );
2556 println!(
2557 " Zero-copy: {:>12?} (speedup: {:.2}x)",
2558 zero_copy_time, zero_copy_speedup
2559 );
2560
2561 let best_algorithm =
2562 if zero_copy_speedup > 1.0 && zero_copy_speedup >= selection_speedup {
2563 "ZERO-COPY"
2564 } else if selection_speedup > 1.0 {
2565 "SELECTION"
2566 } else {
2567 "SORTING"
2568 };
2569 println!(" Best: {}", best_algorithm);
2570 }
2571
2572 println!(); }
2574 }
2575
2576 fn generate_random_data(size: usize) -> Vec<i32> {
2578 let mut rng = 1234567u64;
2580 let mut vec = Vec::with_capacity(size);
2581 for _ in 0..size {
2582 rng = rng.wrapping_mul(1103515245).wrapping_add(12345);
2583 vec.push((rng >> 16) as i32);
2584 }
2585 vec
2586 }
2587
2588 #[test]
2589 #[ignore] fn find_selection_threshold() {
2591 println!("=== FINDING SELECTION ALGORITHM THRESHOLD ===\n");
2592
2593 let mut found_threshold = None;
2595 let test_sizes = vec![
2596 1_000_000, 2_000_000, 3_000_000, 4_000_000, 5_000_000, 7_500_000, 10_000_000,
2597 15_000_000, 20_000_000, 25_000_000, 30_000_000,
2598 ];
2599
2600 for size in test_sizes {
2601 println!("Testing size: {}", size);
2602
2603 let test_data = generate_random_data(size);
2605
2606 let iterations = 3;
2608 let mut sorting_total = 0u128;
2609 let mut selection_total = 0u128;
2610 let mut zero_copy_total = 0u128;
2611
2612 for i in 0..iterations {
2613 println!(" Iteration {}/{}", i + 1, iterations);
2614
2615 let mut unsorted1 = Unsorted::new();
2617 unsorted1.extend(test_data.clone());
2618
2619 let start = Instant::now();
2620 let _result_sorted = unsorted1.quartiles();
2621 sorting_total += start.elapsed().as_nanos();
2622
2623 let mut unsorted2 = Unsorted::new();
2625 unsorted2.extend(test_data.clone());
2626
2627 let start = Instant::now();
2628 let _result_selection = unsorted2.quartiles_with_selection();
2629 selection_total += start.elapsed().as_nanos();
2630
2631 let mut unsorted3 = Unsorted::new();
2633 unsorted3.extend(test_data.clone());
2634
2635 let start = Instant::now();
2636 let _result_zero_copy = unsorted3.quartiles_zero_copy();
2637 zero_copy_total += start.elapsed().as_nanos();
2638 }
2639
2640 let avg_sorting = sorting_total / iterations as u128;
2641 let avg_selection = selection_total / iterations as u128;
2642 let avg_zero_copy = zero_copy_total / iterations as u128;
2643 let selection_speedup = avg_sorting as f64 / avg_selection as f64;
2644 let zero_copy_speedup = avg_sorting as f64 / avg_zero_copy as f64;
2645
2646 println!(
2647 " Average sorting: {:>12.2}ms",
2648 avg_sorting as f64 / 1_000_000.0
2649 );
2650 println!(
2651 " Average selection: {:>12.2}ms (speedup: {:.2}x)",
2652 avg_selection as f64 / 1_000_000.0,
2653 selection_speedup
2654 );
2655 println!(
2656 " Average zero-copy: {:>12.2}ms (speedup: {:.2}x)",
2657 avg_zero_copy as f64 / 1_000_000.0,
2658 zero_copy_speedup
2659 );
2660
2661 if (selection_speedup > 1.0 || zero_copy_speedup > 1.0) && found_threshold.is_none() {
2662 found_threshold = Some(size);
2663 let best_method = if zero_copy_speedup > selection_speedup {
2664 "Zero-copy"
2665 } else {
2666 "Selection"
2667 };
2668 println!(
2669 " *** THRESHOLD FOUND: {} becomes faster at {} elements ***",
2670 best_method, size
2671 );
2672 }
2673
2674 println!();
2675 }
2676
2677 match found_threshold {
2678 Some(threshold) => println!(
2679 "🎯 Selection algorithm becomes faster at approximately {} elements",
2680 threshold
2681 ),
2682 None => println!("❌ Selection algorithm did not become faster in the tested range"),
2683 }
2684 }
2685
2686 #[test]
2687 #[ignore] fn benchmark_different_data_types() {
2689 println!("=== BENCHMARKING DIFFERENT DATA TYPES ===\n");
2690
2691 let size = 5_000_000; println!("Testing with f64 data:");
2695 let float_data: Vec<f64> = generate_random_data(size)
2696 .into_iter()
2697 .map(|x| x as f64 / 1000.0)
2698 .collect();
2699
2700 let mut unsorted1 = Unsorted::new();
2701 unsorted1.extend(float_data.clone());
2702 let start = Instant::now();
2703 let _result = unsorted1.quartiles();
2704 let sorting_time = start.elapsed();
2705
2706 let mut unsorted2 = Unsorted::new();
2707 unsorted2.extend(float_data.clone());
2708 let start = Instant::now();
2709 let _result = unsorted2.quartiles_with_selection();
2710 let selection_time = start.elapsed();
2711
2712 let mut unsorted3 = Unsorted::new();
2713 unsorted3.extend(float_data);
2714 let start = Instant::now();
2715 let _result = unsorted3.quartiles_zero_copy();
2716 let zero_copy_time = start.elapsed();
2717
2718 println!(" Sorting: {:?}", sorting_time);
2719 println!(" Selection: {:?}", selection_time);
2720 println!(" Zero-copy: {:?}", zero_copy_time);
2721 println!(
2722 " Selection Speedup: {:.2}x",
2723 sorting_time.as_nanos() as f64 / selection_time.as_nanos() as f64
2724 );
2725 println!(
2726 " Zero-copy Speedup: {:.2}x\n",
2727 sorting_time.as_nanos() as f64 / zero_copy_time.as_nanos() as f64
2728 );
2729
2730 println!("Testing with i64 data:");
2732 let int64_data: Vec<i64> = generate_random_data(size)
2733 .into_iter()
2734 .map(|x| x as i64 * 1000)
2735 .collect();
2736
2737 let mut unsorted1 = Unsorted::new();
2738 unsorted1.extend(int64_data.clone());
2739 let start = Instant::now();
2740 let _result = unsorted1.quartiles();
2741 let sorting_time = start.elapsed();
2742
2743 let mut unsorted2 = Unsorted::new();
2744 unsorted2.extend(int64_data.clone());
2745 let start = Instant::now();
2746 let _result = unsorted2.quartiles_with_selection();
2747 let selection_time = start.elapsed();
2748
2749 let mut unsorted3 = Unsorted::new();
2750 unsorted3.extend(int64_data);
2751 let start = Instant::now();
2752 let _result = unsorted3.quartiles_zero_copy();
2753 let zero_copy_time = start.elapsed();
2754
2755 println!(" Sorting: {:?}", sorting_time);
2756 println!(" Selection: {:?}", selection_time);
2757 println!(" Zero-copy: {:?}", zero_copy_time);
2758 println!(
2759 " Selection Speedup: {:.2}x",
2760 sorting_time.as_nanos() as f64 / selection_time.as_nanos() as f64
2761 );
2762 println!(
2763 " Zero-copy Speedup: {:.2}x",
2764 sorting_time.as_nanos() as f64 / zero_copy_time.as_nanos() as f64
2765 );
2766 }
2767}