1use std::cmp::Ordering;
15
16use minarrow::{Bitmask, BooleanArray, CategoricalArray, Integer, MaskedArray, Vec64};
17use num_traits::{Float, NumCast, Zero};
18
19#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
21pub enum SortAlgorithm {
22 #[default]
28 Auto,
29 Comparison,
34 Radix,
39 #[cfg(feature = "simd_sort")]
44 Simd,
45}
46
47#[derive(Debug, Clone)]
49pub struct ArgsortConfig {
50 pub descending: bool,
51 pub algorithm: SortAlgorithm,
52 pub stable: bool,
54 pub parallel: bool,
56}
57
58impl Default for ArgsortConfig {
59 fn default() -> Self {
60 Self {
61 descending: false,
62 algorithm: SortAlgorithm::default(),
63 stable: true,
64 parallel: false,
65 }
66 }
67}
68
69impl ArgsortConfig {
70 pub fn new() -> Self {
72 Self::default()
73 }
74
75 pub fn descending(mut self, descending: bool) -> Self {
77 self.descending = descending;
78 self
79 }
80
81 pub fn algorithm(mut self, algorithm: SortAlgorithm) -> Self {
83 self.algorithm = algorithm;
84 self
85 }
86
87 pub fn stable(mut self, stable: bool) -> Self {
89 self.stable = stable;
90 self
91 }
92
93 pub fn parallel(mut self, parallel: bool) -> Self {
95 self.parallel = parallel;
96 self
97 }
98}
99
100#[inline]
107pub fn sort_float<T: Float>(slice: &mut [T]) {
108 slice.sort_unstable_by(total_cmp_f);
109}
110#[inline(always)]
115pub fn total_cmp_f<T: Float>(a: &T, b: &T) -> Ordering {
116 if std::mem::size_of::<T>() == 4 {
118 let ua = unsafe { *(a as *const T as *const u32) };
120 let ub = unsafe { *(b as *const T as *const u32) };
121 let ka = if ua & 0x8000_0000 != 0 {
123 !ua
124 } else {
125 ua ^ 0x8000_0000
126 };
127 let kb = if ub & 0x8000_0000 != 0 {
128 !ub
129 } else {
130 ub ^ 0x8000_0000
131 };
132 ka.cmp(&kb)
133 } else if std::mem::size_of::<T>() == 8 {
134 let ua = unsafe { *(a as *const T as *const u64) };
136 let ub = unsafe { *(b as *const T as *const u64) };
137 let ka = if ua & 0x8000_0000_0000_0000 != 0 {
138 !ua
139 } else {
140 ua ^ 0x8000_0000_0000_0000
141 };
142 let kb = if ub & 0x8000_0000_0000_0000 != 0 {
143 !ub
144 } else {
145 ub ^ 0x8000_0000_0000_0000
146 };
147 ka.cmp(&kb)
148 } else {
149 unreachable!("Only f32 and f64 are valid Float types.")
150 }
151}
152
153#[inline]
157pub fn sorted_float_to<T: Float>(data: &[T], out: &mut [T]) {
158 assert_eq!(
159 data.len(),
160 out.len(),
161 "sorted_float_to: input/output length mismatch"
162 );
163 out.copy_from_slice(data);
164 sort_float(out);
165}
166
167pub fn sorted_float<T: Float>(data: &[T]) -> Vec64<T> {
168 let mut v = Vec64::from_slice(data);
169 sort_float(&mut v);
170 v
171}
172
173#[inline]
193pub fn sort_int<T: Ord + Copy>(slice: &mut [T]) {
194 slice.sort_unstable();
195}
196
197#[inline]
224pub fn sorted_int_to<T: Ord + Copy>(data: &[T], out: &mut [T]) {
225 assert_eq!(
226 data.len(),
227 out.len(),
228 "sorted_int_to: input/output length mismatch"
229 );
230 out.copy_from_slice(data);
231 sort_int(out);
232}
233
234pub fn sorted_int<T: Ord + Copy>(data: &[T]) -> Vec64<T> {
235 let mut v = Vec64::from_slice(data);
236 sort_int(&mut v);
237 v
238}
239
240#[inline]
257pub fn sort_str(slice: &mut [&str]) {
258 slice.sort_unstable();
259}
260
261pub fn sorted_str(data: &[&str]) -> Vec64<String> {
298 let mut v = Vec64::from_slice(data);
299 sort_str(&mut v);
300 v.iter().map(|s| s.to_string()).collect()
301}
302
303pub fn sort_string_array(offsets: &[usize], values: &str) -> (Vec64<usize>, String) {
306 let n = offsets.len() - 1;
307 let mut indices: Vec<usize> = (0..n).collect();
308
309 indices.sort_unstable_by(|&i, &j| {
310 let a = &values[offsets[i]..offsets[i + 1]];
311 let b = &values[offsets[j]..offsets[j + 1]];
312 a.cmp(b)
313 });
314
315 let total_bytes: usize = indices
317 .iter()
318 .map(|&idx| offsets[idx + 1] - offsets[idx])
319 .sum();
320
321 let mut new_values = String::with_capacity(total_bytes);
323 let mut new_offsets = Vec64::<usize>::with_capacity(n + 1);
324
325 unsafe {
327 new_offsets.set_len(n + 1);
328 }
329 unsafe {
330 new_values.as_mut_vec().set_len(total_bytes);
331 }
332
333 let values_bytes = values.as_bytes();
334 let out_bytes = unsafe { new_values.as_mut_vec() };
335
336 unsafe {
338 *new_offsets.get_unchecked_mut(0) = 0;
339 }
340 let mut current_offset = 0;
341 let mut out_ptr = 0;
342 for (i, &idx) in indices.iter().enumerate() {
343 let start = offsets[idx];
344 let end = offsets[idx + 1];
345 let len = end - start;
346 unsafe {
348 std::ptr::copy_nonoverlapping(
349 values_bytes.as_ptr().add(start),
350 out_bytes.as_mut_ptr().add(out_ptr),
351 len,
352 );
353 current_offset += len;
354 *new_offsets.get_unchecked_mut(i + 1) = current_offset;
355 }
356 out_ptr += len;
357 }
358 unsafe {
360 new_values.as_mut_vec().set_len(current_offset);
361 }
362
363 (new_offsets, new_values)
364}
365
366pub fn sort_categorical_lexical<T: Ord + Copy + Zero + NumCast + Integer>(
369 cat: &CategoricalArray<T>,
370) -> (Vec64<T>, Option<Bitmask>) {
371 let len = cat.data.len();
372 let mut indices: Vec<usize> = (0..len).collect();
373
374 indices.sort_by(|&i, &j| {
376 let a_is_null = cat.is_null(i);
377 let b_is_null = cat.is_null(j);
378 match (a_is_null, b_is_null) {
379 (true, true) => Ordering::Equal,
380 (true, false) => Ordering::Less,
381 (false, true) => Ordering::Greater,
382 (false, false) => {
383 let a_key = &cat.unique_values[cat.data[i].to_usize()];
385 let b_key = &cat.unique_values[cat.data[j].to_usize()];
386 a_key.cmp(b_key)
387 }
388 }
389 });
390
391 let mut sorted_data = Vec64::<T>::with_capacity(len);
393 let mut sorted_mask = cat
394 .null_mask
395 .as_ref()
396 .map(|_| Bitmask::new_set_all(len, false));
397
398 for (out_i, &orig_i) in indices.iter().enumerate() {
399 sorted_data.push(cat.data[orig_i]);
400 if let Some(ref mask) = cat.null_mask {
401 if let Some(ref mut sm) = sorted_mask {
402 if unsafe { mask.get_unchecked(orig_i) } {
403 unsafe { sm.set_unchecked(out_i, true) };
404 }
405 }
406 }
407 }
408
409 (sorted_data, sorted_mask)
410}
411
412#[inline]
414fn unpack_bitmask(mask: &Bitmask) -> Vec64<bool> {
415 let mut out = Vec64::with_capacity(mask.len);
416 for i in 0..mask.len {
417 out.push(unsafe { mask.get_unchecked(i) });
418 }
419 out
420}
421
422#[inline]
424fn pack_bitmask(bits: &[bool]) -> Bitmask {
425 let mut mask = Bitmask::new_set_all(bits.len(), false);
426 for (i, &b) in bits.iter().enumerate() {
427 if b {
428 unsafe { mask.set_unchecked(i, true) };
429 }
430 }
431 mask
432}
433
434pub fn sort_boolean_array(arr: &mut BooleanArray<()>) {
437 let bits: Vec64<bool> = unpack_bitmask(&arr.data);
438 let nulls: Option<Vec64<bool>> = arr.null_mask.as_ref().map(unpack_bitmask);
439
440 let mut indices: Vec<usize> = (0..arr.len).collect();
441
442 indices.sort_unstable_by(|&i, &j| {
444 let a_null = !nulls.as_ref().map_or(true, |n| n[i]);
445 let b_null = !nulls.as_ref().map_or(true, |n| n[j]);
446
447 match (a_null, b_null) {
448 (true, true) => Ordering::Equal, (true, false) => Ordering::Less, (false, true) => Ordering::Greater, (false, false) => {
452 match (bits[i], bits[j]) {
454 (false, false) => Ordering::Equal,
455 (false, true) => Ordering::Less,
456 (true, false) => Ordering::Greater,
457 (true, true) => Ordering::Equal,
458 }
459 }
460 }
461 });
462
463 let sorted_bits: Vec<bool> = indices
465 .iter()
466 .map(|&idx| {
467 let is_null = !nulls.as_ref().map_or(true, |n| n[idx]);
468 if is_null { false } else { bits[idx] }
469 })
470 .collect();
471 arr.data = pack_bitmask(&sorted_bits);
472
473 if let Some(null_mask) = arr.null_mask.as_mut() {
475 let sorted_nulls: Vec<bool> = indices
476 .iter()
477 .map(|&idx| nulls.as_ref().unwrap()[idx])
478 .collect();
479 *null_mask = pack_bitmask(&sorted_nulls);
480 }
481}
482
483pub fn sort_slice_with_mask<T: Ord + Copy>(
485 data: &[T],
486 mask: Option<&Bitmask>,
487) -> (Vec64<T>, Option<Bitmask>) {
488 let n = data.len();
489 let mut indices: Vec<usize> = (0..n).collect();
490 indices.sort_unstable_by(|&i, &j| data[i].cmp(&data[j]));
491
492 let mut sorted_data = Vec64::<T>::with_capacity(n);
493 unsafe {
494 sorted_data.set_len(n);
495 }
496 for (i, &idx) in indices.iter().enumerate() {
497 unsafe {
498 *sorted_data.get_unchecked_mut(i) = data[idx];
499 }
500 }
501
502 let sorted_mask = mask.map(|m| {
503 let mut out = Bitmask::new_set_all(n, false);
504 for (i, &idx) in indices.iter().enumerate() {
505 if unsafe { m.get_unchecked(idx) } {
506 unsafe { out.set_unchecked(i, true) };
507 }
508 }
509 out
510 });
511
512 (sorted_data, sorted_mask)
513}
514
515#[inline]
519pub fn argsort<T: Ord>(data: &[T], descending: bool) -> Vec<usize> {
520 argsort_with_stability(data, descending, false)
521}
522
523#[inline]
525pub fn argsort_with_stability<T: Ord>(data: &[T], descending: bool, stable: bool) -> Vec<usize> {
526 let n = data.len();
527 if n == 0 {
528 return vec![];
529 }
530
531 let mut indices: Vec<usize> = (0..n).collect();
532 if stable {
533 if descending {
534 indices.sort_by(|&i, &j| data[j].cmp(&data[i]));
535 } else {
536 indices.sort_by(|&i, &j| data[i].cmp(&data[j]));
537 }
538 } else {
539 if descending {
540 indices.sort_unstable_by(|&i, &j| data[j].cmp(&data[i]));
541 } else {
542 indices.sort_unstable_by(|&i, &j| data[i].cmp(&data[j]));
543 }
544 }
545 indices
546}
547
548#[inline]
550pub fn argsort_float<T: Float>(data: &[T], descending: bool) -> Vec<usize> {
551 argsort_float_with_stability(data, descending, false)
552}
553
554#[inline]
556pub fn argsort_float_with_stability<T: Float>(
557 data: &[T],
558 descending: bool,
559 stable: bool,
560) -> Vec<usize> {
561 let n = data.len();
562 if n == 0 {
563 return vec![];
564 }
565
566 let mut indices: Vec<usize> = (0..n).collect();
567 if stable {
568 if descending {
569 indices.sort_by(|&i, &j| total_cmp_f(&data[j], &data[i]));
570 } else {
571 indices.sort_by(|&i, &j| total_cmp_f(&data[i], &data[j]));
572 }
573 } else {
574 if descending {
575 indices.sort_unstable_by(|&i, &j| total_cmp_f(&data[j], &data[i]));
576 } else {
577 indices.sort_unstable_by(|&i, &j| total_cmp_f(&data[i], &data[j]));
578 }
579 }
580 indices
581}
582
583#[inline]
585pub fn argsort_str(data: &[&str], descending: bool) -> Vec<usize> {
586 argsort_str_with_stability(data, descending, false)
587}
588
589#[inline]
591pub fn argsort_str_with_stability(data: &[&str], descending: bool, stable: bool) -> Vec<usize> {
592 let n = data.len();
593 if n == 0 {
594 return vec![];
595 }
596
597 let mut indices: Vec<usize> = (0..n).collect();
598 if stable {
599 if descending {
600 indices.sort_by(|&i, &j| data[j].cmp(data[i]));
601 } else {
602 indices.sort_by(|&i, &j| data[i].cmp(data[j]));
603 }
604 } else {
605 if descending {
606 indices.sort_unstable_by(|&i, &j| data[j].cmp(data[i]));
607 } else {
608 indices.sort_unstable_by(|&i, &j| data[i].cmp(data[j]));
609 }
610 }
611 indices
612}
613
614pub fn argsort_string_array(offsets: &[usize], values: &str, descending: bool) -> Vec<usize> {
618 argsort_string_array_with_stability(offsets, values, descending, false)
619}
620
621pub fn argsort_string_array_with_stability(
623 offsets: &[usize],
624 values: &str,
625 descending: bool,
626 stable: bool,
627) -> Vec<usize> {
628 let n = if offsets.is_empty() {
629 0
630 } else {
631 offsets.len() - 1
632 };
633 if n == 0 {
634 return vec![];
635 }
636
637 let mut indices: Vec<usize> = (0..n).collect();
638 let cmp = |i: &usize, j: &usize| {
639 let a = &values[offsets[*i]..offsets[*i + 1]];
640 let b = &values[offsets[*j]..offsets[*j + 1]];
641 if descending { b.cmp(a) } else { a.cmp(b) }
642 };
643 if stable {
644 indices.sort_by(cmp);
645 } else {
646 indices.sort_unstable_by(cmp);
647 }
648 indices
649}
650
651pub fn argsort_categorical_lexical<T: Ord + Copy + Zero + NumCast + Integer>(
656 cat: &CategoricalArray<T>,
657 descending: bool,
658) -> Vec<usize> {
659 let len = cat.data.len();
660 if len == 0 {
661 return vec![];
662 }
663
664 let mut indices: Vec<usize> = (0..len).collect();
665
666 indices.sort_by(|&i, &j| {
667 let a_is_null = cat.is_null(i);
668 let b_is_null = cat.is_null(j);
669
670 let ordering = match (a_is_null, b_is_null) {
671 (true, true) => Ordering::Equal,
672 (true, false) => Ordering::Less, (false, true) => Ordering::Greater,
674 (false, false) => {
675 let a_key = &cat.unique_values[cat.data[i].to_usize()];
676 let b_key = &cat.unique_values[cat.data[j].to_usize()];
677 a_key.cmp(b_key)
678 }
679 };
680
681 if descending {
682 ordering.reverse()
683 } else {
684 ordering
685 }
686 });
687
688 indices
689}
690
691pub fn argsort_categorical_custom<T: Ord + Copy + Zero + NumCast + Integer>(
697 cat: &CategoricalArray<T>,
698 category_order: &[&str],
699 descending: bool,
700) -> Vec<usize> {
701 let len = cat.data.len();
702 if len == 0 {
703 return vec![];
704 }
705
706 use std::collections::HashMap;
708 let order_map: HashMap<&str, usize> = category_order
709 .iter()
710 .enumerate()
711 .map(|(i, &s)| (s, i))
712 .collect();
713
714 let mut indices: Vec<usize> = (0..len).collect();
715
716 indices.sort_by(|&i, &j| {
717 let a_is_null = cat.is_null(i);
718 let b_is_null = cat.is_null(j);
719
720 let ordering = match (a_is_null, b_is_null) {
721 (true, true) => Ordering::Equal,
722 (true, false) => Ordering::Less,
723 (false, true) => Ordering::Greater,
724 (false, false) => {
725 let a_key = &cat.unique_values[cat.data[i].to_usize()];
726 let b_key = &cat.unique_values[cat.data[j].to_usize()];
727
728 let a_pos = order_map.get(a_key.as_str());
730 let b_pos = order_map.get(b_key.as_str());
731
732 match (a_pos, b_pos) {
733 (Some(ap), Some(bp)) => ap.cmp(bp),
734 (Some(_), None) => Ordering::Less, (None, Some(_)) => Ordering::Greater,
736 (None, None) => a_key.cmp(b_key), }
738 }
739 };
740
741 if descending {
742 ordering.reverse()
743 } else {
744 ordering
745 }
746 });
747
748 indices
749}
750
751pub fn argsort_boolean(
755 data: &Bitmask,
756 null_mask: Option<&Bitmask>,
757 descending: bool,
758) -> Vec<usize> {
759 let n = data.len;
760 if n == 0 {
761 return vec![];
762 }
763
764 let mut indices: Vec<usize> = (0..n).collect();
765
766 indices.sort_unstable_by(|&i, &j| {
767 let a_null = null_mask.map_or(false, |m| !m.get(i));
768 let b_null = null_mask.map_or(false, |m| !m.get(j));
769
770 let ordering = match (a_null, b_null) {
771 (true, true) => Ordering::Equal,
772 (true, false) => Ordering::Less,
773 (false, true) => Ordering::Greater,
774 (false, false) => {
775 let a_val = data.get(i);
776 let b_val = data.get(j);
777 a_val.cmp(&b_val)
778 }
779 };
780
781 if descending {
782 ordering.reverse()
783 } else {
784 ordering
785 }
786 });
787
788 indices
789}
790
791pub fn argsort_radix_u32(data: &[u32], descending: bool) -> Vec<usize> {
798 let n = data.len();
799 if n == 0 {
800 return vec![];
801 }
802
803 let mut indices: Vec<usize> = (0..n).collect();
804 let mut temp_indices = vec![0usize; n];
805
806 for shift in (0..32).step_by(8) {
808 let mut counts = [0usize; 256];
809 for &idx in &indices {
810 let byte = ((data[idx] >> shift) & 0xFF) as usize;
811 counts[byte] += 1;
812 }
813
814 let mut positions = [0usize; 256];
815 if descending {
816 let mut sum = 0;
817 for i in (0..256).rev() {
818 positions[i] = sum;
819 sum += counts[i];
820 }
821 } else {
822 let mut sum = 0;
823 for i in 0..256 {
824 positions[i] = sum;
825 sum += counts[i];
826 }
827 }
828
829 for &idx in &indices {
830 let byte = ((data[idx] >> shift) & 0xFF) as usize;
831 temp_indices[positions[byte]] = idx;
832 positions[byte] += 1;
833 }
834
835 std::mem::swap(&mut indices, &mut temp_indices);
836 }
837
838 indices
839}
840
841pub fn argsort_radix_u64(data: &[u64], descending: bool) -> Vec<usize> {
843 let n = data.len();
844 if n == 0 {
845 return vec![];
846 }
847
848 let mut indices: Vec<usize> = (0..n).collect();
849 let mut temp_indices = vec![0usize; n];
850
851 for shift in (0..64).step_by(8) {
852 let mut counts = [0usize; 256];
853 for &idx in &indices {
854 let byte = ((data[idx] >> shift) & 0xFF) as usize;
855 counts[byte] += 1;
856 }
857
858 let mut positions = [0usize; 256];
859 if descending {
860 let mut sum = 0;
861 for i in (0..256).rev() {
862 positions[i] = sum;
863 sum += counts[i];
864 }
865 } else {
866 let mut sum = 0;
867 for i in 0..256 {
868 positions[i] = sum;
869 sum += counts[i];
870 }
871 }
872
873 for &idx in &indices {
874 let byte = ((data[idx] >> shift) & 0xFF) as usize;
875 temp_indices[positions[byte]] = idx;
876 positions[byte] += 1;
877 }
878
879 std::mem::swap(&mut indices, &mut temp_indices);
880 }
881
882 indices
883}
884
885pub fn argsort_radix_i32(data: &[i32], descending: bool) -> Vec<usize> {
887 let unsigned: Vec<u32> = data.iter().map(|&x| (x as u32) ^ 0x8000_0000).collect();
889 argsort_radix_u32(&unsigned, descending)
890}
891
892pub fn argsort_radix_i64(data: &[i64], descending: bool) -> Vec<usize> {
894 let unsigned: Vec<u64> = data
895 .iter()
896 .map(|&x| (x as u64) ^ 0x8000_0000_0000_0000)
897 .collect();
898 argsort_radix_u64(&unsigned, descending)
899}
900
901#[cfg(feature = "simd_sort")]
904pub mod simd_argsort {
905 use std::cmp::Ordering;
906 use voracious_radix_sort::{RadixSort, Radixable};
907
908 #[derive(Copy, Clone, Debug)]
910 struct IndexedU32 {
911 value: u32,
912 index: usize,
913 }
914
915 impl PartialOrd for IndexedU32 {
916 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
917 self.value.partial_cmp(&other.value)
918 }
919 }
920
921 impl PartialEq for IndexedU32 {
922 fn eq(&self, other: &Self) -> bool {
923 self.value == other.value
924 }
925 }
926
927 impl Radixable<u32> for IndexedU32 {
928 type Key = u32;
929 #[inline]
930 fn key(&self) -> Self::Key {
931 self.value
932 }
933 }
934
935 #[derive(Copy, Clone, Debug)]
937 struct IndexedU64 {
938 value: u64,
939 index: usize,
940 }
941
942 impl PartialOrd for IndexedU64 {
943 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
944 self.value.partial_cmp(&other.value)
945 }
946 }
947
948 impl PartialEq for IndexedU64 {
949 fn eq(&self, other: &Self) -> bool {
950 self.value == other.value
951 }
952 }
953
954 impl Radixable<u64> for IndexedU64 {
955 type Key = u64;
956 #[inline]
957 fn key(&self) -> Self::Key {
958 self.value
959 }
960 }
961
962 #[derive(Copy, Clone, Debug)]
964 struct IndexedI32 {
965 value: i32,
966 index: usize,
967 }
968
969 impl PartialOrd for IndexedI32 {
970 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
971 self.value.partial_cmp(&other.value)
972 }
973 }
974
975 impl PartialEq for IndexedI32 {
976 fn eq(&self, other: &Self) -> bool {
977 self.value == other.value
978 }
979 }
980
981 impl Radixable<i32> for IndexedI32 {
982 type Key = i32;
983 #[inline]
984 fn key(&self) -> Self::Key {
985 self.value
986 }
987 }
988
989 #[derive(Copy, Clone, Debug)]
991 struct IndexedI64 {
992 value: i64,
993 index: usize,
994 }
995
996 impl PartialOrd for IndexedI64 {
997 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
998 self.value.partial_cmp(&other.value)
999 }
1000 }
1001
1002 impl PartialEq for IndexedI64 {
1003 fn eq(&self, other: &Self) -> bool {
1004 self.value == other.value
1005 }
1006 }
1007
1008 impl Radixable<i64> for IndexedI64 {
1009 type Key = i64;
1010 #[inline]
1011 fn key(&self) -> Self::Key {
1012 self.value
1013 }
1014 }
1015
1016 pub fn argsort_simd_u32(data: &[u32], descending: bool) -> Vec<usize> {
1018 let n = data.len();
1019 if n == 0 {
1020 return vec![];
1021 }
1022
1023 let mut indexed: Vec<IndexedU32> = data
1024 .iter()
1025 .enumerate()
1026 .map(|(index, &value)| IndexedU32 { value, index })
1027 .collect();
1028
1029 indexed.voracious_sort();
1030
1031 if descending {
1032 indexed.iter().rev().map(|x| x.index).collect()
1033 } else {
1034 indexed.iter().map(|x| x.index).collect()
1035 }
1036 }
1037
1038 pub fn argsort_simd_u64(data: &[u64], descending: bool) -> Vec<usize> {
1040 let n = data.len();
1041 if n == 0 {
1042 return vec![];
1043 }
1044
1045 let mut indexed: Vec<IndexedU64> = data
1046 .iter()
1047 .enumerate()
1048 .map(|(index, &value)| IndexedU64 { value, index })
1049 .collect();
1050
1051 indexed.voracious_sort();
1052
1053 if descending {
1054 indexed.iter().rev().map(|x| x.index).collect()
1055 } else {
1056 indexed.iter().map(|x| x.index).collect()
1057 }
1058 }
1059
1060 pub fn argsort_simd_i32(data: &[i32], descending: bool) -> Vec<usize> {
1062 let n = data.len();
1063 if n == 0 {
1064 return vec![];
1065 }
1066
1067 let mut indexed: Vec<IndexedI32> = data
1068 .iter()
1069 .enumerate()
1070 .map(|(index, &value)| IndexedI32 { value, index })
1071 .collect();
1072
1073 indexed.voracious_sort();
1074
1075 if descending {
1076 indexed.iter().rev().map(|x| x.index).collect()
1077 } else {
1078 indexed.iter().map(|x| x.index).collect()
1079 }
1080 }
1081
1082 pub fn argsort_simd_i64(data: &[i64], descending: bool) -> Vec<usize> {
1084 let n = data.len();
1085 if n == 0 {
1086 return vec![];
1087 }
1088
1089 let mut indexed: Vec<IndexedI64> = data
1090 .iter()
1091 .enumerate()
1092 .map(|(index, &value)| IndexedI64 { value, index })
1093 .collect();
1094
1095 indexed.voracious_sort();
1096
1097 if descending {
1098 indexed.iter().rev().map(|x| x.index).collect()
1099 } else {
1100 indexed.iter().map(|x| x.index).collect()
1101 }
1102 }
1103}
1104
1105pub fn argsort_auto_i32(data: &[i32], config: &ArgsortConfig) -> Vec<usize> {
1113 match config.algorithm {
1114 SortAlgorithm::Comparison => argsort_with_stability(data, config.descending, config.stable),
1115 SortAlgorithm::Radix => argsort_radix_i32(data, config.descending),
1116 #[cfg(feature = "simd_sort")]
1117 SortAlgorithm::Simd => simd_argsort::argsort_simd_i32(data, config.descending),
1118 SortAlgorithm::Auto => {
1119 if config.stable {
1120 return argsort_with_stability(data, config.descending, true);
1121 }
1122 #[cfg(feature = "simd_sort")]
1123 {
1124 simd_argsort::argsort_simd_i32(data, config.descending)
1125 }
1126 #[cfg(not(feature = "simd_sort"))]
1127 {
1128 argsort_radix_i32(data, config.descending)
1129 }
1130 }
1131 }
1132}
1133
1134pub fn argsort_auto_i64(data: &[i64], config: &ArgsortConfig) -> Vec<usize> {
1136 match config.algorithm {
1137 SortAlgorithm::Comparison => argsort_with_stability(data, config.descending, config.stable),
1138 SortAlgorithm::Radix => argsort_radix_i64(data, config.descending),
1139 #[cfg(feature = "simd_sort")]
1140 SortAlgorithm::Simd => simd_argsort::argsort_simd_i64(data, config.descending),
1141 SortAlgorithm::Auto => {
1142 if config.stable {
1143 return argsort_with_stability(data, config.descending, true);
1144 }
1145 #[cfg(feature = "simd_sort")]
1146 {
1147 simd_argsort::argsort_simd_i64(data, config.descending)
1148 }
1149 #[cfg(not(feature = "simd_sort"))]
1150 {
1151 argsort_radix_i64(data, config.descending)
1152 }
1153 }
1154 }
1155}
1156
1157pub fn argsort_auto_u32(data: &[u32], config: &ArgsortConfig) -> Vec<usize> {
1159 match config.algorithm {
1160 SortAlgorithm::Comparison => argsort_with_stability(data, config.descending, config.stable),
1161 SortAlgorithm::Radix => argsort_radix_u32(data, config.descending),
1162 #[cfg(feature = "simd_sort")]
1163 SortAlgorithm::Simd => simd_argsort::argsort_simd_u32(data, config.descending),
1164 SortAlgorithm::Auto => {
1165 if config.stable {
1166 return argsort_with_stability(data, config.descending, true);
1167 }
1168 #[cfg(feature = "simd_sort")]
1169 {
1170 simd_argsort::argsort_simd_u32(data, config.descending)
1171 }
1172 #[cfg(not(feature = "simd_sort"))]
1173 {
1174 argsort_radix_u32(data, config.descending)
1175 }
1176 }
1177 }
1178}
1179
1180pub fn argsort_auto_u64(data: &[u64], config: &ArgsortConfig) -> Vec<usize> {
1182 match config.algorithm {
1183 SortAlgorithm::Comparison => argsort_with_stability(data, config.descending, config.stable),
1184 SortAlgorithm::Radix => argsort_radix_u64(data, config.descending),
1185 #[cfg(feature = "simd_sort")]
1186 SortAlgorithm::Simd => simd_argsort::argsort_simd_u64(data, config.descending),
1187 SortAlgorithm::Auto => {
1188 if config.stable {
1189 return argsort_with_stability(data, config.descending, true);
1190 }
1191 #[cfg(feature = "simd_sort")]
1192 {
1193 simd_argsort::argsort_simd_u64(data, config.descending)
1194 }
1195 #[cfg(not(feature = "simd_sort"))]
1196 {
1197 argsort_radix_u64(data, config.descending)
1198 }
1199 }
1200 }
1201}
1202
1203#[cfg(test)]
1204mod tests {
1205 use minarrow::vec64;
1206
1207 use super::*;
1208
1209 #[test]
1210 fn test_total_cmp_f32_ordering_basic() {
1211 let a = 1.0f32;
1212 let b = 2.0f32;
1213 assert_eq!(total_cmp_f(&a, &b), std::cmp::Ordering::Less);
1214 assert_eq!(total_cmp_f(&b, &a), std::cmp::Ordering::Greater);
1215 assert_eq!(total_cmp_f(&a, &a), std::cmp::Ordering::Equal);
1216 }
1217
1218 #[test]
1219 fn test_total_cmp_f64_ordering_basic() {
1220 let a = 1.0f64;
1221 let b = 2.0f64;
1222 assert_eq!(total_cmp_f(&a, &b), std::cmp::Ordering::Less);
1223 assert_eq!(total_cmp_f(&b, &a), std::cmp::Ordering::Greater);
1224 assert_eq!(total_cmp_f(&a, &a), std::cmp::Ordering::Equal);
1225 }
1226
1227 #[test]
1228 fn test_total_cmp_zero_and_negzero() {
1229 let pz = 0.0f32;
1230 let nz = -0.0f32;
1231 assert_ne!(f32::to_bits(pz), f32::to_bits(nz));
1233 assert_ne!(total_cmp_f(&pz, &nz), std::cmp::Ordering::Equal);
1234 }
1235
1236 #[test]
1237 fn test_total_cmp_nan_ordering_f32() {
1238 let nan = f32::NAN;
1239 let pos_inf = f32::INFINITY;
1240 let neg_inf = f32::NEG_INFINITY;
1241 let one = 1.0f32;
1242
1243 assert_eq!(total_cmp_f(&nan, &pos_inf), std::cmp::Ordering::Greater);
1245 assert_eq!(total_cmp_f(&nan, &neg_inf), std::cmp::Ordering::Greater);
1246 assert_eq!(total_cmp_f(&nan, &one), std::cmp::Ordering::Greater);
1247 assert_eq!(total_cmp_f(&nan, &nan), std::cmp::Ordering::Equal);
1249 }
1250
1251 #[test]
1252 fn test_total_cmp_nan_ordering_f64() {
1253 let nan = f64::NAN;
1254 let pos_inf = f64::INFINITY;
1255 let neg_inf = f64::NEG_INFINITY;
1256 let one = 1.0f64;
1257
1258 assert_eq!(total_cmp_f(&nan, &pos_inf), std::cmp::Ordering::Greater);
1259 assert_eq!(total_cmp_f(&nan, &neg_inf), std::cmp::Ordering::Greater);
1260 assert_eq!(total_cmp_f(&nan, &one), std::cmp::Ordering::Greater);
1261 assert_eq!(total_cmp_f(&nan, &nan), std::cmp::Ordering::Equal);
1262 }
1263
1264 #[test]
1265 fn test_sort_float_f32_all_edge_cases() {
1266 let mut v = vec64![
1267 3.0f32,
1268 -0.0,
1269 0.0,
1270 f32::INFINITY,
1271 f32::NEG_INFINITY,
1272 1.0,
1273 -1.0,
1274 f32::NAN,
1275 2.0,
1276 -2.0,
1277 ];
1278 sort_float(&mut v);
1279 assert_eq!(v[0], f32::NEG_INFINITY);
1282 assert_eq!(v[1], -2.0);
1283 assert_eq!(v[2], -1.0);
1284 assert_eq!(v[3], -0.0);
1285 assert_eq!(v[4], 0.0);
1286 assert_eq!(v[5], 1.0);
1287 assert_eq!(v[6], 2.0);
1288 assert_eq!(v[7], 3.0);
1289 assert_eq!(v[8], f32::INFINITY);
1290 assert!(v[9].is_nan());
1291 }
1292
1293 #[test]
1294 fn test_sort_float_f64_all_edge_cases() {
1295 let mut v = vec64![
1296 3.0f64,
1297 -0.0,
1298 0.0,
1299 f64::INFINITY,
1300 f64::NEG_INFINITY,
1301 1.0,
1302 -1.0,
1303 f64::NAN,
1304 2.0,
1305 -2.0,
1306 ];
1307 sort_float(&mut v);
1308 assert_eq!(v[0], f64::NEG_INFINITY);
1309 assert_eq!(v[1], -2.0);
1310 assert_eq!(v[2], -1.0);
1311 assert_eq!(v[3], -0.0);
1312 assert_eq!(v[4], 0.0);
1313 assert_eq!(v[5], 1.0);
1314 assert_eq!(v[6], 2.0);
1315 assert_eq!(v[7], 3.0);
1316 assert_eq!(v[8], f64::INFINITY);
1317 assert!(v[9].is_nan());
1318 }
1319
1320 #[test]
1321 fn test_sorted_float_immutability_and_return_type() {
1322 let v = vec64![1.0f32, 2.0, 3.0];
1323 let out = sorted_float(&v);
1324 assert_eq!(out.as_slice(), &[1.0, 2.0, 3.0]);
1325 assert_eq!(*v, [1.0, 2.0, 3.0]);
1327 }
1328
1329 #[test]
1330 fn test_sorted_float_correct_for_f64() {
1331 let v = vec64![3.0f64, 2.0, 1.0];
1332 let out = sorted_float(&v);
1333 assert_eq!(out.as_slice(), &[1.0, 2.0, 3.0]);
1334 }
1335
1336 #[test]
1337 fn test_sort_float_empty_and_single() {
1338 let mut v: [f32; 0] = [];
1339 sort_float(&mut v);
1340 let mut v2 = [42.0f32];
1341 sort_float(&mut v2);
1342 assert_eq!(v2, [42.0]);
1343 }
1344
1345 #[cfg(test)]
1346 mod tests {
1347 use super::*;
1348 use minarrow::{Vec64, vec64};
1349
1350 #[test]
1351 fn test_sort_int_empty_and_single() {
1352 let mut arr: [i32; 0] = [];
1353 sort_int(&mut arr);
1354 assert_eq!(arr, [] as [i32; 0]);
1355 let mut arr2 = vec64![42];
1356 sort_int(&mut arr2);
1357 assert_eq!(*arr2, [42]);
1358 }
1359
1360 #[test]
1361 fn test_sort_int_order() {
1362 let mut arr = vec64![4, 2, 7, 1, 1, 6, 0, -5];
1363 sort_int(&mut arr);
1364 assert_eq!(*arr, [-5, 0, 1, 1, 2, 4, 6, 7]);
1365 }
1366
1367 #[test]
1368 fn test_sorted_int_immutability_and_output() {
1369 let arr = vec64![5, 3, 7, 1, 2];
1370 let sorted = sorted_int(&arr);
1371 assert_eq!(sorted.as_slice(), &[1, 2, 3, 5, 7]);
1372 assert_eq!(*arr, [5, 3, 7, 1, 2]);
1374 }
1375
1376 #[test]
1377 fn test_sort_str_basic() {
1378 let mut arr = vec64!["z", "b", "a", "d"];
1379 sort_str(&mut arr);
1380 assert_eq!(*arr, ["a", "b", "d", "z"]);
1381 }
1382
1383 #[test]
1384 fn test_sorted_str_and_non_ascii() {
1385 let arr = vec64!["z", "ä", "b", "a", "d"];
1386 let sorted = sorted_str(&arr);
1387 assert_eq!(sorted.as_slice(), &["a", "b", "d", "z", "ä"]); assert_eq!(*arr, ["z", "ä", "b", "a", "d"]);
1390 }
1391
1392 #[test]
1393 fn test_sort_string_array_basic() {
1394 let offsets = vec![0, 1, 3, 5, 5, 6]; let values = "abcde".to_string() + "f";
1396 let (new_offsets, new_values) = sort_string_array(&offsets, &values);
1397 let slices: Vec<_> = new_offsets
1399 .windows(2)
1400 .map(|w| &new_values[w[0]..w[1]])
1401 .collect();
1402 assert_eq!(slices, &["", "a", "bc", "de", "f"]);
1403 }
1404
1405 #[test]
1406 fn test_sort_categorical_lexical_basic_and_nulls() {
1407 let unique = Vec64::from_slice_clone(&[
1409 "apple".to_string(),
1410 "banana".to_string(),
1411 "pear".to_string(),
1412 ]);
1413 let data = Vec64::from_slice(&[2u8, 0, 1, 1, 2, 0, 1]);
1414 let mask = Bitmask::from_bools(&[true, false, true, true, true, false, true]);
1415 let cat = CategoricalArray {
1416 data: data.into(),
1417 unique_values: unique,
1418 null_mask: Some(mask.clone()),
1419 };
1420 let (sorted_data, sorted_mask) = sort_categorical_lexical(&cat);
1421
1422 let mask_out = sorted_mask.unwrap();
1424 let null_pos: Vec<_> = (0..mask_out.len()).filter(|&i| !mask_out.get(i)).collect();
1425 assert_eq!(null_pos, &[0, 1]);
1426
1427 let sorted_as_str: Vec<_> = sorted_data
1429 .iter()
1430 .map(|&k| &cat.unique_values[k.to_usize()][..])
1431 .collect();
1432 let vals = &sorted_as_str[null_pos.len()..];
1433 assert_eq!(vals, &["banana", "banana", "banana", "pear", "pear"]);
1434 }
1435
1436 #[test]
1437 fn test_sort_categorical_all_nulls_and_no_nulls() {
1438 let unique = Vec64::from_slice_clone(&["x".to_string()]);
1440 let data = Vec64::from_slice(&[0u8, 0, 0]);
1441 let mask = Bitmask::from_bools(&[false, false, false]);
1442 let cat = CategoricalArray {
1443 data: data.clone().into(),
1444 unique_values: unique.clone(),
1445 null_mask: Some(mask.clone()),
1446 };
1447 let (_, sorted_mask) = sort_categorical_lexical(&cat);
1448 assert_eq!(
1449 unpack_bitmask(&sorted_mask.unwrap()),
1450 vec64![false, false, false]
1451 );
1452 let mask2 = Bitmask::from_bools(&[true, true, true]);
1454 let cat2 = CategoricalArray {
1455 data: data.clone().into(),
1456 unique_values: unique,
1457 null_mask: Some(mask2.clone()),
1458 };
1459 let (_, sorted_mask2) = sort_categorical_lexical(&cat2);
1460 assert_eq!(
1461 unpack_bitmask(&sorted_mask2.unwrap()),
1462 vec64![true, true, true]
1463 );
1464 }
1465 #[test]
1466 fn test_sort_boolean_array_with_nulls() {
1467 let mut arr = BooleanArray {
1468 data: pack_bitmask(&[false, true, false, true, true, false]),
1469 null_mask: Some(pack_bitmask(&[true, false, true, true, false, true])),
1470 len: 6,
1471 _phantom: std::marker::PhantomData,
1472 };
1473 sort_boolean_array(&mut arr);
1474 let bits = unpack_bitmask(&arr.data);
1476 let nulls = unpack_bitmask(arr.null_mask.as_ref().unwrap());
1477 assert_eq!(nulls, vec64![false, false, true, true, true, true]);
1478 assert_eq!(&bits[2..], [false, false, false, true]);
1480 }
1481
1482 #[test]
1483 fn test_sort_boolean_array_all_true_and_all_false() {
1484 let mut arr = BooleanArray {
1485 data: pack_bitmask(&[true, true, true]),
1486 null_mask: None,
1487 len: 3,
1488 _phantom: std::marker::PhantomData,
1489 };
1490 sort_boolean_array(&mut arr);
1491 assert_eq!(unpack_bitmask(&arr.data), vec64![true, true, true]);
1492
1493 let mut arr2 = BooleanArray {
1494 data: pack_bitmask(&[false, false, false]),
1495 null_mask: None,
1496 len: 3,
1497 _phantom: std::marker::PhantomData,
1498 };
1499 sort_boolean_array(&mut arr2);
1500 assert_eq!(unpack_bitmask(&arr2.data), vec64![false, false, false]);
1501 }
1502
1503 #[test]
1504 fn test_sort_boolean_array_all_nulls_and_none() {
1505 let mut arr = BooleanArray {
1506 data: pack_bitmask(&[true, false, true]),
1507 null_mask: Some(pack_bitmask(&[false, false, false])),
1508 len: 3,
1509 _phantom: std::marker::PhantomData,
1510 };
1511 sort_boolean_array(&mut arr);
1512 assert_eq!(
1513 unpack_bitmask(arr.null_mask.as_ref().unwrap()),
1514 vec64![false, false, false]
1515 );
1516 }
1517
1518 #[test]
1519 fn test_sort_slice_with_mask_basic() {
1520 let data = vec64![3, 1, 2, 1];
1521 let mask = pack_bitmask(&[true, false, true, true]);
1522 let (sorted, mask_out) = sort_slice_with_mask(&data, Some(&mask));
1523 assert_eq!(sorted.as_slice(), &[1, 1, 2, 3]);
1524 assert_eq!(
1525 unpack_bitmask(&mask_out.unwrap()),
1526 vec64![false, true, true, true]
1527 );
1528 }
1529
1530 #[test]
1531 fn test_sort_slice_with_mask_no_mask() {
1532 let data = vec64![3, 2, 1, 1, 0];
1533 let (sorted, mask_out) = sort_slice_with_mask(&data, None);
1534 assert_eq!(sorted.as_slice(), &[0, 1, 1, 2, 3]);
1535 assert!(mask_out.is_none());
1536 }
1537
1538 #[test]
1539 fn test_sort_slice_with_mask_all_true_and_all_false() {
1540 let data = vec64![3, 2, 1, 0];
1541 let mask_true = pack_bitmask(&[true; 4]);
1542 let mask_false = pack_bitmask(&[false; 4]);
1543 let (_, mask_true_out) = sort_slice_with_mask(&data, Some(&mask_true));
1544 let (_, mask_false_out) = sort_slice_with_mask(&data, Some(&mask_false));
1545 assert_eq!(
1546 unpack_bitmask(&mask_true_out.unwrap()),
1547 vec64![true, true, true, true]
1548 );
1549 assert_eq!(
1550 unpack_bitmask(&mask_false_out.unwrap()),
1551 vec64![false, false, false, false]
1552 );
1553 }
1554
1555 #[test]
1556 fn test_sort_int_with_duplicates_and_negatives() {
1557 let mut arr = vec64![-10, -1, 5, 0, 5, -10];
1558 sort_int(&mut arr);
1559 assert_eq!(*arr, [-10, -10, -1, 0, 5, 5]);
1560 }
1561
1562 #[test]
1563 fn test_sort_str_empty_and_duplicate() {
1564 let mut arr = vec64!["", "a", "b", "a", ""];
1565 sort_str(&mut arr);
1566 assert_eq!(*arr, ["", "", "a", "a", "b"]);
1567 }
1568
1569 #[test]
1574 fn test_argsort_basic() {
1575 let data = [5i32, 2, 8, 1, 9];
1576 let indices = argsort(&data, false);
1577 assert_eq!(indices, vec![3, 1, 0, 2, 4]); let sorted: Vec<i32> = indices.iter().map(|&i| data[i]).collect();
1580 assert_eq!(sorted, vec![1, 2, 5, 8, 9]);
1581 }
1582
1583 #[test]
1584 fn test_argsort_descending() {
1585 let data = [5i32, 2, 8, 1, 9];
1586 let indices = argsort(&data, true);
1587 let sorted: Vec<i32> = indices.iter().map(|&i| data[i]).collect();
1588 assert_eq!(sorted, vec![9, 8, 5, 2, 1]);
1589 }
1590
1591 #[test]
1592 fn test_argsort_empty() {
1593 let data: [i32; 0] = [];
1594 let indices = argsort(&data, false);
1595 assert!(indices.is_empty());
1596 }
1597
1598 #[test]
1599 fn test_argsort_float_basic() {
1600 let data = [3.0f64, 1.0, 4.0, 1.5, 2.0];
1601 let indices = argsort_float(&data, false);
1602 let sorted: Vec<f64> = indices.iter().map(|&i| data[i]).collect();
1603 assert_eq!(sorted, vec![1.0, 1.5, 2.0, 3.0, 4.0]);
1604 }
1605
1606 #[test]
1607 fn test_argsort_float_with_nan() {
1608 let data = [3.0f64, f64::NAN, 1.0, f64::NEG_INFINITY];
1609 let indices = argsort_float(&data, false);
1610 let sorted: Vec<f64> = indices.iter().map(|&i| data[i]).collect();
1611 assert_eq!(sorted[0], f64::NEG_INFINITY);
1613 assert_eq!(sorted[1], 1.0);
1614 assert_eq!(sorted[2], 3.0);
1615 assert!(sorted[3].is_nan());
1616 }
1617
1618 #[test]
1619 fn test_argsort_str_basic() {
1620 let data = ["cherry", "apple", "banana"];
1621 let indices = argsort_str(&data, false);
1622 let sorted: Vec<&str> = indices.iter().map(|&i| data[i]).collect();
1623 assert_eq!(sorted, vec!["apple", "banana", "cherry"]);
1624 }
1625
1626 #[test]
1627 fn test_argsort_str_descending() {
1628 let data = ["cherry", "apple", "banana"];
1629 let indices = argsort_str(&data, true);
1630 let sorted: Vec<&str> = indices.iter().map(|&i| data[i]).collect();
1631 assert_eq!(sorted, vec!["cherry", "banana", "apple"]);
1632 }
1633
1634 #[test]
1635 fn test_argsort_string_array_basic() {
1636 let values = "applecherrybanana";
1638 let offsets = [0usize, 5, 11, 17];
1639 let indices = argsort_string_array(&offsets, values, false);
1640 assert_eq!(indices, vec![0, 2, 1]);
1642 }
1643
1644 #[test]
1645 fn test_argsort_radix_u32_basic() {
1646 let data = vec![5u32, 2, 8, 1, 9, 3, 7, 4, 6, 0];
1647 let indices = argsort_radix_u32(&data, false);
1648 let sorted: Vec<u32> = indices.iter().map(|&i| data[i]).collect();
1649 assert_eq!(sorted, vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]);
1650 }
1651
1652 #[test]
1653 fn test_argsort_radix_u32_descending() {
1654 let data = vec![5u32, 2, 8, 1, 9, 3, 7, 4, 6, 0];
1655 let indices = argsort_radix_u32(&data, true);
1656 let sorted: Vec<u32> = indices.iter().map(|&i| data[i]).collect();
1657 assert_eq!(sorted, vec![9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
1658 }
1659
1660 #[test]
1661 fn test_argsort_radix_i32_with_negatives() {
1662 let data = vec![5i32, -2, 8, -1, 9, 3, -7, 4, 6, 0];
1663 let indices = argsort_radix_i32(&data, false);
1664 let sorted: Vec<i32> = indices.iter().map(|&i| data[i]).collect();
1665 assert_eq!(sorted, vec![-7, -2, -1, 0, 3, 4, 5, 6, 8, 9]);
1666 }
1667
1668 #[test]
1669 fn test_argsort_radix_u64_basic() {
1670 let data = vec![100u64, 50, 200, 25];
1671 let indices = argsort_radix_u64(&data, false);
1672 let sorted: Vec<u64> = indices.iter().map(|&i| data[i]).collect();
1673 assert_eq!(sorted, vec![25, 50, 100, 200]);
1674 }
1675
1676 #[test]
1677 fn test_argsort_radix_i64_with_negatives() {
1678 let data = vec![5i64, -2, 8, -1, 0];
1679 let indices = argsort_radix_i64(&data, false);
1680 let sorted: Vec<i64> = indices.iter().map(|&i| data[i]).collect();
1681 assert_eq!(sorted, vec![-2, -1, 0, 5, 8]);
1682 }
1683
1684 #[test]
1685 fn test_argsort_boolean_basic() {
1686 use minarrow::Bitmask;
1687 let mut data = Bitmask::new_set_all(4, false);
1689 data.set(0, true);
1690 data.set(2, true);
1691
1692 let indices = argsort_boolean(&data, None, false);
1693 let sorted: Vec<bool> = indices.iter().map(|&i| data.get(i)).collect();
1695 assert_eq!(sorted, vec![false, false, true, true]);
1696 }
1697
1698 #[test]
1699 fn test_argsort_boolean_descending() {
1700 use minarrow::Bitmask;
1701 let mut data = Bitmask::new_set_all(4, false);
1702 data.set(0, true);
1703 data.set(2, true);
1704
1705 let indices = argsort_boolean(&data, None, true);
1706 let sorted: Vec<bool> = indices.iter().map(|&i| data.get(i)).collect();
1707 assert_eq!(sorted, vec![true, true, false, false]);
1708 }
1709
1710 #[test]
1711 fn test_argsort_boolean_with_nulls() {
1712 use minarrow::Bitmask;
1713 let mut data = Bitmask::new_set_all(4, false);
1716 data.set(0, true);
1717 data.set(2, true);
1718
1719 let mut null_mask = Bitmask::new_set_all(4, true);
1720 null_mask.set(1, false); let indices = argsort_boolean(&data, Some(&null_mask), false);
1723 assert_eq!(indices[0], 1); }
1726
1727 #[test]
1728 fn test_argsort_auto_i32_basic() {
1729 let data = [5i32, 2, 8, 1, 9];
1730 let config = ArgsortConfig::default();
1731 let indices = argsort_auto_i32(&data, &config);
1732 let sorted: Vec<i32> = indices.iter().map(|&i| data[i]).collect();
1733 assert_eq!(sorted, vec![1, 2, 5, 8, 9]);
1734 }
1735
1736 #[test]
1737 fn test_argsort_auto_with_comparison() {
1738 let data = [5i32, 2, 8, 1, 9];
1739 let config = ArgsortConfig::new().algorithm(SortAlgorithm::Comparison);
1740 let indices = argsort_auto_i32(&data, &config);
1741 let sorted: Vec<i32> = indices.iter().map(|&i| data[i]).collect();
1742 assert_eq!(sorted, vec![1, 2, 5, 8, 9]);
1743 }
1744
1745 #[test]
1746 fn test_argsort_auto_descending() {
1747 let data = [5i32, 2, 8, 1, 9];
1748 let config = ArgsortConfig::new().descending(true);
1749 let indices = argsort_auto_i32(&data, &config);
1750 let sorted: Vec<i32> = indices.iter().map(|&i| data[i]).collect();
1751 assert_eq!(sorted, vec![9, 8, 5, 2, 1]);
1752 }
1753
1754 fn apply_indices<T: Copy>(data: &[T], indices: &[usize]) -> Vec<T> {
1762 indices.iter().map(|&i| data[i]).collect()
1763 }
1764
1765 #[test]
1766 fn test_i32_sorts_produce_same_results() {
1767 let data = vec![
1769 5i32,
1770 -2,
1771 8,
1772 -1,
1773 9,
1774 3,
1775 -7,
1776 4,
1777 6,
1778 0,
1779 i32::MAX,
1780 i32::MIN,
1781 0,
1782 -100,
1783 100,
1784 1,
1785 1,
1786 1,
1787 -1,
1788 -1, ];
1790
1791 let comparison_asc = argsort(&data, false);
1793 let radix_asc = argsort_radix_i32(&data, false);
1794 let auto_comparison = argsort_auto_i32(
1795 &data,
1796 &ArgsortConfig::new().algorithm(SortAlgorithm::Comparison),
1797 );
1798 let auto_radix =
1799 argsort_auto_i32(&data, &ArgsortConfig::new().algorithm(SortAlgorithm::Radix));
1800 let auto_default = argsort_auto_i32(&data, &ArgsortConfig::default());
1801
1802 let sorted_comparison: Vec<i32> = apply_indices(&data, &comparison_asc);
1804 let sorted_radix: Vec<i32> = apply_indices(&data, &radix_asc);
1805 let sorted_auto_comparison: Vec<i32> = apply_indices(&data, &auto_comparison);
1806 let sorted_auto_radix: Vec<i32> = apply_indices(&data, &auto_radix);
1807 let sorted_auto_default: Vec<i32> = apply_indices(&data, &auto_default);
1808
1809 assert_eq!(
1811 sorted_comparison, sorted_radix,
1812 "comparison vs radix mismatch"
1813 );
1814 assert_eq!(
1815 sorted_comparison, sorted_auto_comparison,
1816 "comparison vs auto_comparison mismatch"
1817 );
1818 assert_eq!(
1819 sorted_comparison, sorted_auto_radix,
1820 "comparison vs auto_radix mismatch"
1821 );
1822 assert_eq!(
1823 sorted_comparison, sorted_auto_default,
1824 "comparison vs auto_default mismatch"
1825 );
1826
1827 for i in 1..sorted_comparison.len() {
1829 assert!(
1830 sorted_comparison[i - 1] <= sorted_comparison[i],
1831 "not sorted at index {}",
1832 i
1833 );
1834 }
1835
1836 let comparison_desc = argsort(&data, true);
1838 let radix_desc = argsort_radix_i32(&data, true);
1839
1840 let sorted_comparison_desc: Vec<i32> = apply_indices(&data, &comparison_desc);
1841 let sorted_radix_desc: Vec<i32> = apply_indices(&data, &radix_desc);
1842
1843 assert_eq!(
1844 sorted_comparison_desc, sorted_radix_desc,
1845 "descending comparison vs radix mismatch"
1846 );
1847
1848 for i in 1..sorted_comparison_desc.len() {
1850 assert!(
1851 sorted_comparison_desc[i - 1] >= sorted_comparison_desc[i],
1852 "not sorted descending at index {}",
1853 i
1854 );
1855 }
1856 }
1857
1858 #[test]
1859 fn test_i64_sorts_produce_same_results() {
1860 let data = vec![
1861 5i64,
1862 -2,
1863 8,
1864 -1,
1865 9,
1866 3,
1867 -7,
1868 4,
1869 6,
1870 0,
1871 i64::MAX,
1872 i64::MIN,
1873 0,
1874 -100,
1875 100,
1876 i64::MAX - 1,
1877 i64::MIN + 1,
1878 ];
1879
1880 let comparison_asc = argsort(&data, false);
1881 let radix_asc = argsort_radix_i64(&data, false);
1882 let auto_comparison = argsort_auto_i64(
1883 &data,
1884 &ArgsortConfig::new().algorithm(SortAlgorithm::Comparison),
1885 );
1886 let auto_radix =
1887 argsort_auto_i64(&data, &ArgsortConfig::new().algorithm(SortAlgorithm::Radix));
1888
1889 let sorted_comparison: Vec<i64> = apply_indices(&data, &comparison_asc);
1890 let sorted_radix: Vec<i64> = apply_indices(&data, &radix_asc);
1891 let sorted_auto_comparison: Vec<i64> = apply_indices(&data, &auto_comparison);
1892 let sorted_auto_radix: Vec<i64> = apply_indices(&data, &auto_radix);
1893
1894 assert_eq!(
1895 sorted_comparison, sorted_radix,
1896 "i64: comparison vs radix mismatch"
1897 );
1898 assert_eq!(
1899 sorted_comparison, sorted_auto_comparison,
1900 "i64: comparison vs auto_comparison mismatch"
1901 );
1902 assert_eq!(
1903 sorted_comparison, sorted_auto_radix,
1904 "i64: comparison vs auto_radix mismatch"
1905 );
1906
1907 for i in 1..sorted_comparison.len() {
1909 assert!(
1910 sorted_comparison[i - 1] <= sorted_comparison[i],
1911 "i64: not sorted at index {}",
1912 i
1913 );
1914 }
1915 }
1916
1917 #[test]
1918 fn test_u32_sorts_produce_same_results() {
1919 let data = vec![
1920 5u32,
1921 2,
1922 8,
1923 1,
1924 9,
1925 3,
1926 7,
1927 4,
1928 6,
1929 0,
1930 u32::MAX,
1931 0,
1932 100,
1933 u32::MAX - 1,
1934 1,
1935 1,
1936 1, ];
1938
1939 let comparison_asc = argsort(&data, false);
1940 let radix_asc = argsort_radix_u32(&data, false);
1941 let auto_comparison = argsort_auto_u32(
1942 &data,
1943 &ArgsortConfig::new().algorithm(SortAlgorithm::Comparison),
1944 );
1945 let auto_radix =
1946 argsort_auto_u32(&data, &ArgsortConfig::new().algorithm(SortAlgorithm::Radix));
1947
1948 let sorted_comparison: Vec<u32> = apply_indices(&data, &comparison_asc);
1949 let sorted_radix: Vec<u32> = apply_indices(&data, &radix_asc);
1950 let sorted_auto_comparison: Vec<u32> = apply_indices(&data, &auto_comparison);
1951 let sorted_auto_radix: Vec<u32> = apply_indices(&data, &auto_radix);
1952
1953 assert_eq!(
1954 sorted_comparison, sorted_radix,
1955 "u32: comparison vs radix mismatch"
1956 );
1957 assert_eq!(
1958 sorted_comparison, sorted_auto_comparison,
1959 "u32: comparison vs auto_comparison mismatch"
1960 );
1961 assert_eq!(
1962 sorted_comparison, sorted_auto_radix,
1963 "u32: comparison vs auto_radix mismatch"
1964 );
1965
1966 for i in 1..sorted_comparison.len() {
1968 assert!(
1969 sorted_comparison[i - 1] <= sorted_comparison[i],
1970 "u32: not sorted at index {}",
1971 i
1972 );
1973 }
1974 }
1975
1976 #[test]
1977 fn test_u64_sorts_produce_same_results() {
1978 let data = vec![
1979 5u64,
1980 2,
1981 8,
1982 1,
1983 9,
1984 3,
1985 7,
1986 4,
1987 6,
1988 0,
1989 u64::MAX,
1990 0,
1991 100,
1992 u64::MAX - 1,
1993 ];
1994
1995 let comparison_asc = argsort(&data, false);
1996 let radix_asc = argsort_radix_u64(&data, false);
1997 let auto_comparison = argsort_auto_u64(
1998 &data,
1999 &ArgsortConfig::new().algorithm(SortAlgorithm::Comparison),
2000 );
2001 let auto_radix =
2002 argsort_auto_u64(&data, &ArgsortConfig::new().algorithm(SortAlgorithm::Radix));
2003
2004 let sorted_comparison: Vec<u64> = apply_indices(&data, &comparison_asc);
2005 let sorted_radix: Vec<u64> = apply_indices(&data, &radix_asc);
2006 let sorted_auto_comparison: Vec<u64> = apply_indices(&data, &auto_comparison);
2007 let sorted_auto_radix: Vec<u64> = apply_indices(&data, &auto_radix);
2008
2009 assert_eq!(
2010 sorted_comparison, sorted_radix,
2011 "u64: comparison vs radix mismatch"
2012 );
2013 assert_eq!(
2014 sorted_comparison, sorted_auto_comparison,
2015 "u64: comparison vs auto_comparison mismatch"
2016 );
2017 assert_eq!(
2018 sorted_comparison, sorted_auto_radix,
2019 "u64: comparison vs auto_radix mismatch"
2020 );
2021
2022 for i in 1..sorted_comparison.len() {
2024 assert!(
2025 sorted_comparison[i - 1] <= sorted_comparison[i],
2026 "u64: not sorted at index {}",
2027 i
2028 );
2029 }
2030 }
2031
2032 #[cfg(feature = "simd_sort")]
2033 #[test]
2034 fn test_i32_sorts_produce_same_results_with_simd() {
2035 use super::simd_argsort;
2036
2037 let data = vec![
2038 5i32,
2039 -2,
2040 8,
2041 -1,
2042 9,
2043 3,
2044 -7,
2045 4,
2046 6,
2047 0,
2048 i32::MAX,
2049 i32::MIN,
2050 0,
2051 -100,
2052 100,
2053 1,
2054 1,
2055 1,
2056 -1,
2057 -1,
2058 ];
2059
2060 let comparison_asc = argsort(&data, false);
2061 let radix_asc = argsort_radix_i32(&data, false);
2062 let simd_asc = simd_argsort::argsort_simd_i32(&data, false);
2063 let auto_simd =
2064 argsort_auto_i32(&data, &ArgsortConfig::new().algorithm(SortAlgorithm::Simd));
2065
2066 let sorted_comparison: Vec<i32> = apply_indices(&data, &comparison_asc);
2067 let sorted_radix: Vec<i32> = apply_indices(&data, &radix_asc);
2068 let sorted_simd: Vec<i32> = apply_indices(&data, &simd_asc);
2069 let sorted_auto_simd: Vec<i32> = apply_indices(&data, &auto_simd);
2070
2071 assert_eq!(
2072 sorted_comparison, sorted_radix,
2073 "simd test: comparison vs radix mismatch"
2074 );
2075 assert_eq!(
2076 sorted_comparison, sorted_simd,
2077 "simd test: comparison vs simd mismatch"
2078 );
2079 assert_eq!(
2080 sorted_comparison, sorted_auto_simd,
2081 "simd test: comparison vs auto_simd mismatch"
2082 );
2083 }
2084
2085 #[cfg(feature = "simd_sort")]
2086 #[test]
2087 fn test_i64_sorts_produce_same_results_with_simd() {
2088 use super::simd_argsort;
2089
2090 let data = vec![
2091 5i64,
2092 -2,
2093 8,
2094 -1,
2095 9,
2096 3,
2097 -7,
2098 4,
2099 6,
2100 0,
2101 i64::MAX,
2102 i64::MIN,
2103 0,
2104 -100,
2105 100,
2106 ];
2107
2108 let comparison_asc = argsort(&data, false);
2109 let radix_asc = argsort_radix_i64(&data, false);
2110 let simd_asc = simd_argsort::argsort_simd_i64(&data, false);
2111
2112 let sorted_comparison: Vec<i64> = apply_indices(&data, &comparison_asc);
2113 let sorted_radix: Vec<i64> = apply_indices(&data, &radix_asc);
2114 let sorted_simd: Vec<i64> = apply_indices(&data, &simd_asc);
2115
2116 assert_eq!(
2117 sorted_comparison, sorted_radix,
2118 "simd i64: comparison vs radix mismatch"
2119 );
2120 assert_eq!(
2121 sorted_comparison, sorted_simd,
2122 "simd i64: comparison vs simd mismatch"
2123 );
2124 }
2125
2126 #[cfg(feature = "simd_sort")]
2127 #[test]
2128 fn test_u32_sorts_produce_same_results_with_simd() {
2129 use super::simd_argsort;
2130
2131 let data = vec![
2132 5u32,
2133 2,
2134 8,
2135 1,
2136 9,
2137 3,
2138 7,
2139 4,
2140 6,
2141 0,
2142 u32::MAX,
2143 0,
2144 100,
2145 u32::MAX - 1,
2146 ];
2147
2148 let comparison_asc = argsort(&data, false);
2149 let radix_asc = argsort_radix_u32(&data, false);
2150 let simd_asc = simd_argsort::argsort_simd_u32(&data, false);
2151
2152 let sorted_comparison: Vec<u32> = apply_indices(&data, &comparison_asc);
2153 let sorted_radix: Vec<u32> = apply_indices(&data, &radix_asc);
2154 let sorted_simd: Vec<u32> = apply_indices(&data, &simd_asc);
2155
2156 assert_eq!(
2157 sorted_comparison, sorted_radix,
2158 "simd u32: comparison vs radix mismatch"
2159 );
2160 assert_eq!(
2161 sorted_comparison, sorted_simd,
2162 "simd u32: comparison vs simd mismatch"
2163 );
2164 }
2165
2166 #[cfg(feature = "simd_sort")]
2167 #[test]
2168 fn test_u64_sorts_produce_same_results_with_simd() {
2169 use super::simd_argsort;
2170
2171 let data = vec![
2172 5u64,
2173 2,
2174 8,
2175 1,
2176 9,
2177 3,
2178 7,
2179 4,
2180 6,
2181 0,
2182 u64::MAX,
2183 0,
2184 100,
2185 u64::MAX - 1,
2186 ];
2187
2188 let comparison_asc = argsort(&data, false);
2189 let radix_asc = argsort_radix_u64(&data, false);
2190 let simd_asc = simd_argsort::argsort_simd_u64(&data, false);
2191
2192 let sorted_comparison: Vec<u64> = apply_indices(&data, &comparison_asc);
2193 let sorted_radix: Vec<u64> = apply_indices(&data, &radix_asc);
2194 let sorted_simd: Vec<u64> = apply_indices(&data, &simd_asc);
2195
2196 assert_eq!(
2197 sorted_comparison, sorted_radix,
2198 "simd u64: comparison vs radix mismatch"
2199 );
2200 assert_eq!(
2201 sorted_comparison, sorted_simd,
2202 "simd u64: comparison vs simd mismatch"
2203 );
2204 }
2205
2206 #[test]
2208 fn test_numeric_sorts_produce_same_results_large_dataset() {
2209 fn lcg_next(state: &mut u64) -> i32 {
2211 *state = state.wrapping_mul(6364136223846793005).wrapping_add(1);
2212 (*state >> 33) as i32
2213 }
2214
2215 let mut state = 12345u64;
2216 let data: Vec<i32> = (0..10_000).map(|_| lcg_next(&mut state)).collect();
2217
2218 let comparison_asc = argsort(&data, false);
2219 let radix_asc = argsort_radix_i32(&data, false);
2220
2221 let sorted_comparison: Vec<i32> = apply_indices(&data, &comparison_asc);
2222 let sorted_radix: Vec<i32> = apply_indices(&data, &radix_asc);
2223
2224 assert_eq!(
2225 sorted_comparison, sorted_radix,
2226 "large dataset: comparison vs radix mismatch"
2227 );
2228
2229 for i in 1..sorted_comparison.len() {
2231 assert!(
2232 sorted_comparison[i - 1] <= sorted_comparison[i],
2233 "large dataset: not sorted at index {}",
2234 i
2235 );
2236 }
2237 }
2238
2239 #[test]
2244 fn test_stable_sort_preserves_relative_order() {
2245 let data = [3i32, 1, 2, 1];
2247 let indices = argsort_with_stability(&data, false, true);
2248 assert_eq!(indices[0], 1); assert_eq!(indices[1], 3); assert_eq!(indices[2], 2); assert_eq!(indices[3], 0); }
2254
2255 #[test]
2256 fn test_stable_float_preserves_relative_order() {
2257 let data = [3.0f64, 1.0, 2.0, 1.0];
2259 let indices = argsort_float_with_stability(&data, false, true);
2260 assert_eq!(indices[0], 1); assert_eq!(indices[1], 3); assert_eq!(indices[2], 2); assert_eq!(indices[3], 0); }
2265
2266 #[test]
2267 fn test_argsort_config_stable_default_true() {
2268 let config = ArgsortConfig::default();
2269 assert!(config.stable);
2270 }
2271
2272 #[test]
2273 fn test_argsort_auto_stable_preserves_order() {
2274 let data = [3i32, 1, 2, 1];
2275 let config = ArgsortConfig::new(); let indices = argsort_auto_i32(&data, &config);
2277 assert_eq!(indices[0], 1); assert_eq!(indices[1], 3); }
2281
2282 #[test]
2283 fn test_argsort_auto_unstable_still_sorts_correctly() {
2284 let data = [5i32, 2, 8, 1, 9];
2285 let config = ArgsortConfig::new().stable(false);
2286 let indices = argsort_auto_i32(&data, &config);
2287 let sorted: Vec<i32> = indices.iter().map(|&i| data[i]).collect();
2288 assert_eq!(sorted, vec![1, 2, 5, 8, 9]);
2289 }
2290 }
2291}