1use std::cmp::Ordering;
15
16use minarrow::{Bitmask, BooleanArray, CategoricalArray, Integer, MaskedArray, Vec64};
17use num_traits::{Float, NumCast, Zero};
18
19#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
21pub enum SortAlgorithm {
22 #[default]
28 Auto,
29 Comparison,
34 Radix,
39 #[cfg(feature = "simd_sort")]
44 Simd,
45}
46
47#[derive(Debug, Clone)]
49pub struct ArgsortConfig {
50 pub descending: bool,
51 pub algorithm: SortAlgorithm,
52 pub stable: bool,
54 pub parallel: bool,
56}
57
58impl Default for ArgsortConfig {
59 fn default() -> Self {
60 Self {
61 descending: false,
62 algorithm: SortAlgorithm::default(),
63 stable: true,
64 parallel: false,
65 }
66 }
67}
68
69impl ArgsortConfig {
70 pub fn new() -> Self {
72 Self::default()
73 }
74
75 pub fn descending(mut self, descending: bool) -> Self {
77 self.descending = descending;
78 self
79 }
80
81 pub fn algorithm(mut self, algorithm: SortAlgorithm) -> Self {
83 self.algorithm = algorithm;
84 self
85 }
86
87 pub fn stable(mut self, stable: bool) -> Self {
89 self.stable = stable;
90 self
91 }
92
93 pub fn parallel(mut self, parallel: bool) -> Self {
95 self.parallel = parallel;
96 self
97 }
98}
99
100#[inline]
107pub fn sort_float<T: Float>(slice: &mut [T]) {
108 slice.sort_unstable_by(total_cmp_f);
109}
110#[inline(always)]
115pub fn total_cmp_f<T: Float>(a: &T, b: &T) -> Ordering {
116 if std::mem::size_of::<T>() == 4 {
118 let ua = unsafe { *(a as *const T as *const u32) };
120 let ub = unsafe { *(b as *const T as *const u32) };
121 let ka = if ua & 0x8000_0000 != 0 {
123 !ua
124 } else {
125 ua ^ 0x8000_0000
126 };
127 let kb = if ub & 0x8000_0000 != 0 {
128 !ub
129 } else {
130 ub ^ 0x8000_0000
131 };
132 ka.cmp(&kb)
133 } else if std::mem::size_of::<T>() == 8 {
134 let ua = unsafe { *(a as *const T as *const u64) };
136 let ub = unsafe { *(b as *const T as *const u64) };
137 let ka = if ua & 0x8000_0000_0000_0000 != 0 {
138 !ua
139 } else {
140 ua ^ 0x8000_0000_0000_0000
141 };
142 let kb = if ub & 0x8000_0000_0000_0000 != 0 {
143 !ub
144 } else {
145 ub ^ 0x8000_0000_0000_0000
146 };
147 ka.cmp(&kb)
148 } else {
149 unreachable!("Only f32 and f64 are valid Float types.")
150 }
151}
152
153#[inline]
157pub fn sorted_float_to<T: Float>(data: &[T], out: &mut [T]) {
158 assert_eq!(
159 data.len(),
160 out.len(),
161 "sorted_float_to: input/output length mismatch"
162 );
163 out.copy_from_slice(data);
164 sort_float(out);
165}
166
167pub fn sorted_float<T: Float>(data: &[T]) -> Vec64<T> {
168 let mut v = Vec64::from_slice(data);
169 sort_float(&mut v);
170 v
171}
172
173#[inline]
193pub fn sort_int<T: Ord + Copy>(slice: &mut [T]) {
194 slice.sort_unstable();
195}
196
197#[inline]
224pub fn sorted_int_to<T: Ord + Copy>(data: &[T], out: &mut [T]) {
225 assert_eq!(
226 data.len(),
227 out.len(),
228 "sorted_int_to: input/output length mismatch"
229 );
230 out.copy_from_slice(data);
231 sort_int(out);
232}
233
234pub fn sorted_int<T: Ord + Copy>(data: &[T]) -> Vec64<T> {
235 let mut v = Vec64::from_slice(data);
236 sort_int(&mut v);
237 v
238}
239
240#[inline]
257pub fn sort_str(slice: &mut [&str]) {
258 slice.sort_unstable();
259}
260
261pub fn sorted_str(data: &[&str]) -> Vec64<String> {
298 let mut v = Vec64::from_slice(data);
299 sort_str(&mut v);
300 v.iter().map(|s| s.to_string()).collect()
301}
302
303pub fn sort_string_array(offsets: &[usize], values: &str) -> (Vec64<usize>, String) {
306 let n = offsets.len() - 1;
307 let mut indices: Vec<usize> = (0..n).collect();
308
309 indices.sort_unstable_by(|&i, &j| {
310 let a = &values[offsets[i]..offsets[i + 1]];
311 let b = &values[offsets[j]..offsets[j + 1]];
312 a.cmp(b)
313 });
314
315 let total_bytes: usize = indices
317 .iter()
318 .map(|&idx| offsets[idx + 1] - offsets[idx])
319 .sum();
320
321 let mut new_values = String::with_capacity(total_bytes);
323 let mut new_offsets = Vec64::<usize>::with_capacity(n + 1);
324
325 unsafe {
327 new_offsets.set_len(n + 1);
328 }
329 unsafe {
330 new_values.as_mut_vec().set_len(total_bytes);
331 }
332
333 let values_bytes = values.as_bytes();
334 let out_bytes = unsafe { new_values.as_mut_vec() };
335
336 unsafe {
338 *new_offsets.get_unchecked_mut(0) = 0;
339 }
340 let mut current_offset = 0;
341 let mut out_ptr = 0;
342 for (i, &idx) in indices.iter().enumerate() {
343 let start = offsets[idx];
344 let end = offsets[idx + 1];
345 let len = end - start;
346 unsafe {
348 std::ptr::copy_nonoverlapping(
349 values_bytes.as_ptr().add(start),
350 out_bytes.as_mut_ptr().add(out_ptr),
351 len,
352 );
353 current_offset += len;
354 *new_offsets.get_unchecked_mut(i + 1) = current_offset;
355 }
356 out_ptr += len;
357 }
358 unsafe {
360 new_values.as_mut_vec().set_len(current_offset);
361 }
362
363 (new_offsets, new_values)
364}
365
366pub fn sort_categorical_lexical<T: Ord + Copy + Zero + NumCast + Integer>(
369 cat: &CategoricalArray<T>,
370) -> (Vec64<T>, Option<Bitmask>) {
371 let len = cat.data.len();
372 let mut indices: Vec<usize> = (0..len).collect();
373
374 indices.sort_by(|&i, &j| {
376 let a_is_null = cat.is_null(i);
377 let b_is_null = cat.is_null(j);
378 match (a_is_null, b_is_null) {
379 (true, true) => Ordering::Equal,
380 (true, false) => Ordering::Less,
381 (false, true) => Ordering::Greater,
382 (false, false) => {
383 let a_key = &cat.unique_values[cat.data[i].to_usize()];
385 let b_key = &cat.unique_values[cat.data[j].to_usize()];
386 a_key.cmp(b_key)
387 }
388 }
389 });
390
391 let mut sorted_data = Vec64::<T>::with_capacity(len);
393 let mut sorted_mask = cat
394 .null_mask
395 .as_ref()
396 .map(|_| Bitmask::new_set_all(len, false));
397
398 for (out_i, &orig_i) in indices.iter().enumerate() {
399 sorted_data.push(cat.data[orig_i]);
400 if let Some(ref mask) = cat.null_mask {
401 if let Some(ref mut sm) = sorted_mask {
402 if unsafe { mask.get_unchecked(orig_i) } {
403 unsafe { sm.set_unchecked(out_i, true) };
404 }
405 }
406 }
407 }
408
409 (sorted_data, sorted_mask)
410}
411
412#[inline]
414fn unpack_bitmask(mask: &Bitmask) -> Vec64<bool> {
415 let mut out = Vec64::with_capacity(mask.len);
416 for i in 0..mask.len {
417 out.push(unsafe { mask.get_unchecked(i) });
418 }
419 out
420}
421
422#[inline]
424fn pack_bitmask(bits: &[bool]) -> Bitmask {
425 let mut mask = Bitmask::new_set_all(bits.len(), false);
426 for (i, &b) in bits.iter().enumerate() {
427 if b {
428 unsafe { mask.set_unchecked(i, true) };
429 }
430 }
431 mask
432}
433
434pub fn sort_boolean_array(arr: &mut BooleanArray<()>) {
437 let bits: Vec64<bool> = unpack_bitmask(&arr.data);
438 let nulls: Option<Vec64<bool>> = arr.null_mask.as_ref().map(unpack_bitmask);
439
440 let mut indices: Vec<usize> = (0..arr.len).collect();
441
442 indices.sort_unstable_by(|&i, &j| {
444 let a_null = !nulls.as_ref().map_or(true, |n| n[i]);
445 let b_null = !nulls.as_ref().map_or(true, |n| n[j]);
446
447 match (a_null, b_null) {
448 (true, true) => Ordering::Equal, (true, false) => Ordering::Less, (false, true) => Ordering::Greater, (false, false) => {
452 match (bits[i], bits[j]) {
454 (false, false) => Ordering::Equal,
455 (false, true) => Ordering::Less,
456 (true, false) => Ordering::Greater,
457 (true, true) => Ordering::Equal,
458 }
459 }
460 }
461 });
462
463 let sorted_bits: Vec<bool> = indices
465 .iter()
466 .map(|&idx| {
467 let is_null = !nulls.as_ref().map_or(true, |n| n[idx]);
468 if is_null { false } else { bits[idx] }
469 })
470 .collect();
471 arr.data = pack_bitmask(&sorted_bits);
472
473 if let Some(null_mask) = arr.null_mask.as_mut() {
475 let sorted_nulls: Vec<bool> = indices
476 .iter()
477 .map(|&idx| nulls.as_ref().unwrap()[idx])
478 .collect();
479 *null_mask = pack_bitmask(&sorted_nulls);
480 }
481}
482
483pub fn sort_slice_with_mask<T: Ord + Copy>(
485 data: &[T],
486 mask: Option<&Bitmask>,
487) -> (Vec64<T>, Option<Bitmask>) {
488 let n = data.len();
489 let mut indices: Vec<usize> = (0..n).collect();
490 indices.sort_unstable_by(|&i, &j| data[i].cmp(&data[j]));
491
492 let mut sorted_data = Vec64::<T>::with_capacity(n);
493 unsafe {
494 sorted_data.set_len(n);
495 }
496 for (i, &idx) in indices.iter().enumerate() {
497 unsafe {
498 *sorted_data.get_unchecked_mut(i) = data[idx];
499 }
500 }
501
502 let sorted_mask = mask.map(|m| {
503 let mut out = Bitmask::new_set_all(n, false);
504 for (i, &idx) in indices.iter().enumerate() {
505 if unsafe { m.get_unchecked(idx) } {
506 unsafe { out.set_unchecked(i, true) };
507 }
508 }
509 out
510 });
511
512 (sorted_data, sorted_mask)
513}
514
515#[inline]
519pub fn argsort<T: Ord>(data: &[T], descending: bool) -> Vec<usize> {
520 argsort_with_stability(data, descending, false)
521}
522
523#[inline]
525pub fn argsort_with_stability<T: Ord>(data: &[T], descending: bool, stable: bool) -> Vec<usize> {
526 let n = data.len();
527 if n == 0 {
528 return vec![];
529 }
530
531 let mut indices: Vec<usize> = (0..n).collect();
532 if stable {
533 if descending {
534 indices.sort_by(|&i, &j| data[j].cmp(&data[i]));
535 } else {
536 indices.sort_by(|&i, &j| data[i].cmp(&data[j]));
537 }
538 } else {
539 if descending {
540 indices.sort_unstable_by(|&i, &j| data[j].cmp(&data[i]));
541 } else {
542 indices.sort_unstable_by(|&i, &j| data[i].cmp(&data[j]));
543 }
544 }
545 indices
546}
547
548#[inline]
550pub fn argsort_float<T: Float>(data: &[T], descending: bool) -> Vec<usize> {
551 argsort_float_with_stability(data, descending, false)
552}
553
554#[inline]
556pub fn argsort_float_with_stability<T: Float>(data: &[T], descending: bool, stable: bool) -> Vec<usize> {
557 let n = data.len();
558 if n == 0 {
559 return vec![];
560 }
561
562 let mut indices: Vec<usize> = (0..n).collect();
563 if stable {
564 if descending {
565 indices.sort_by(|&i, &j| total_cmp_f(&data[j], &data[i]));
566 } else {
567 indices.sort_by(|&i, &j| total_cmp_f(&data[i], &data[j]));
568 }
569 } else {
570 if descending {
571 indices.sort_unstable_by(|&i, &j| total_cmp_f(&data[j], &data[i]));
572 } else {
573 indices.sort_unstable_by(|&i, &j| total_cmp_f(&data[i], &data[j]));
574 }
575 }
576 indices
577}
578
579#[inline]
581pub fn argsort_str(data: &[&str], descending: bool) -> Vec<usize> {
582 argsort_str_with_stability(data, descending, false)
583}
584
585#[inline]
587pub fn argsort_str_with_stability(data: &[&str], descending: bool, stable: bool) -> Vec<usize> {
588 let n = data.len();
589 if n == 0 {
590 return vec![];
591 }
592
593 let mut indices: Vec<usize> = (0..n).collect();
594 if stable {
595 if descending {
596 indices.sort_by(|&i, &j| data[j].cmp(data[i]));
597 } else {
598 indices.sort_by(|&i, &j| data[i].cmp(data[j]));
599 }
600 } else {
601 if descending {
602 indices.sort_unstable_by(|&i, &j| data[j].cmp(data[i]));
603 } else {
604 indices.sort_unstable_by(|&i, &j| data[i].cmp(data[j]));
605 }
606 }
607 indices
608}
609
610pub fn argsort_string_array(offsets: &[usize], values: &str, descending: bool) -> Vec<usize> {
614 argsort_string_array_with_stability(offsets, values, descending, false)
615}
616
617pub fn argsort_string_array_with_stability(offsets: &[usize], values: &str, descending: bool, stable: bool) -> Vec<usize> {
619 let n = if offsets.is_empty() {
620 0
621 } else {
622 offsets.len() - 1
623 };
624 if n == 0 {
625 return vec![];
626 }
627
628 let mut indices: Vec<usize> = (0..n).collect();
629 let cmp = |i: &usize, j: &usize| {
630 let a = &values[offsets[*i]..offsets[*i + 1]];
631 let b = &values[offsets[*j]..offsets[*j + 1]];
632 if descending { b.cmp(a) } else { a.cmp(b) }
633 };
634 if stable {
635 indices.sort_by(cmp);
636 } else {
637 indices.sort_unstable_by(cmp);
638 }
639 indices
640}
641
642pub fn argsort_categorical_lexical<T: Ord + Copy + Zero + NumCast + Integer>(
647 cat: &CategoricalArray<T>,
648 descending: bool,
649) -> Vec<usize> {
650 let len = cat.data.len();
651 if len == 0 {
652 return vec![];
653 }
654
655 let mut indices: Vec<usize> = (0..len).collect();
656
657 indices.sort_by(|&i, &j| {
658 let a_is_null = cat.is_null(i);
659 let b_is_null = cat.is_null(j);
660
661 let ordering = match (a_is_null, b_is_null) {
662 (true, true) => Ordering::Equal,
663 (true, false) => Ordering::Less, (false, true) => Ordering::Greater,
665 (false, false) => {
666 let a_key = &cat.unique_values[cat.data[i].to_usize()];
667 let b_key = &cat.unique_values[cat.data[j].to_usize()];
668 a_key.cmp(b_key)
669 }
670 };
671
672 if descending {
673 ordering.reverse()
674 } else {
675 ordering
676 }
677 });
678
679 indices
680}
681
682pub fn argsort_categorical_custom<T: Ord + Copy + Zero + NumCast + Integer>(
688 cat: &CategoricalArray<T>,
689 category_order: &[&str],
690 descending: bool,
691) -> Vec<usize> {
692 let len = cat.data.len();
693 if len == 0 {
694 return vec![];
695 }
696
697 use std::collections::HashMap;
699 let order_map: HashMap<&str, usize> = category_order
700 .iter()
701 .enumerate()
702 .map(|(i, &s)| (s, i))
703 .collect();
704
705 let mut indices: Vec<usize> = (0..len).collect();
706
707 indices.sort_by(|&i, &j| {
708 let a_is_null = cat.is_null(i);
709 let b_is_null = cat.is_null(j);
710
711 let ordering = match (a_is_null, b_is_null) {
712 (true, true) => Ordering::Equal,
713 (true, false) => Ordering::Less,
714 (false, true) => Ordering::Greater,
715 (false, false) => {
716 let a_key = &cat.unique_values[cat.data[i].to_usize()];
717 let b_key = &cat.unique_values[cat.data[j].to_usize()];
718
719 let a_pos = order_map.get(a_key.as_str());
721 let b_pos = order_map.get(b_key.as_str());
722
723 match (a_pos, b_pos) {
724 (Some(ap), Some(bp)) => ap.cmp(bp),
725 (Some(_), None) => Ordering::Less, (None, Some(_)) => Ordering::Greater,
727 (None, None) => a_key.cmp(b_key), }
729 }
730 };
731
732 if descending {
733 ordering.reverse()
734 } else {
735 ordering
736 }
737 });
738
739 indices
740}
741
742pub fn argsort_boolean(
746 data: &Bitmask,
747 null_mask: Option<&Bitmask>,
748 descending: bool,
749) -> Vec<usize> {
750 let n = data.len;
751 if n == 0 {
752 return vec![];
753 }
754
755 let mut indices: Vec<usize> = (0..n).collect();
756
757 indices.sort_unstable_by(|&i, &j| {
758 let a_null = null_mask.map_or(false, |m| !m.get(i));
759 let b_null = null_mask.map_or(false, |m| !m.get(j));
760
761 let ordering = match (a_null, b_null) {
762 (true, true) => Ordering::Equal,
763 (true, false) => Ordering::Less,
764 (false, true) => Ordering::Greater,
765 (false, false) => {
766 let a_val = data.get(i);
767 let b_val = data.get(j);
768 a_val.cmp(&b_val)
769 }
770 };
771
772 if descending {
773 ordering.reverse()
774 } else {
775 ordering
776 }
777 });
778
779 indices
780}
781
782pub fn argsort_radix_u32(data: &[u32], descending: bool) -> Vec<usize> {
789 let n = data.len();
790 if n == 0 {
791 return vec![];
792 }
793
794 let mut indices: Vec<usize> = (0..n).collect();
795 let mut temp_indices = vec![0usize; n];
796
797 for shift in (0..32).step_by(8) {
799 let mut counts = [0usize; 256];
800 for &idx in &indices {
801 let byte = ((data[idx] >> shift) & 0xFF) as usize;
802 counts[byte] += 1;
803 }
804
805 let mut positions = [0usize; 256];
806 if descending {
807 let mut sum = 0;
808 for i in (0..256).rev() {
809 positions[i] = sum;
810 sum += counts[i];
811 }
812 } else {
813 let mut sum = 0;
814 for i in 0..256 {
815 positions[i] = sum;
816 sum += counts[i];
817 }
818 }
819
820 for &idx in &indices {
821 let byte = ((data[idx] >> shift) & 0xFF) as usize;
822 temp_indices[positions[byte]] = idx;
823 positions[byte] += 1;
824 }
825
826 std::mem::swap(&mut indices, &mut temp_indices);
827 }
828
829 indices
830}
831
832pub fn argsort_radix_u64(data: &[u64], descending: bool) -> Vec<usize> {
834 let n = data.len();
835 if n == 0 {
836 return vec![];
837 }
838
839 let mut indices: Vec<usize> = (0..n).collect();
840 let mut temp_indices = vec![0usize; n];
841
842 for shift in (0..64).step_by(8) {
843 let mut counts = [0usize; 256];
844 for &idx in &indices {
845 let byte = ((data[idx] >> shift) & 0xFF) as usize;
846 counts[byte] += 1;
847 }
848
849 let mut positions = [0usize; 256];
850 if descending {
851 let mut sum = 0;
852 for i in (0..256).rev() {
853 positions[i] = sum;
854 sum += counts[i];
855 }
856 } else {
857 let mut sum = 0;
858 for i in 0..256 {
859 positions[i] = sum;
860 sum += counts[i];
861 }
862 }
863
864 for &idx in &indices {
865 let byte = ((data[idx] >> shift) & 0xFF) as usize;
866 temp_indices[positions[byte]] = idx;
867 positions[byte] += 1;
868 }
869
870 std::mem::swap(&mut indices, &mut temp_indices);
871 }
872
873 indices
874}
875
876pub fn argsort_radix_i32(data: &[i32], descending: bool) -> Vec<usize> {
878 let unsigned: Vec<u32> = data.iter().map(|&x| (x as u32) ^ 0x8000_0000).collect();
880 argsort_radix_u32(&unsigned, descending)
881}
882
883pub fn argsort_radix_i64(data: &[i64], descending: bool) -> Vec<usize> {
885 let unsigned: Vec<u64> = data
886 .iter()
887 .map(|&x| (x as u64) ^ 0x8000_0000_0000_0000)
888 .collect();
889 argsort_radix_u64(&unsigned, descending)
890}
891
892#[cfg(feature = "simd_sort")]
895pub mod simd_argsort {
896 use std::cmp::Ordering;
897 use voracious_radix_sort::{RadixSort, Radixable};
898
899 #[derive(Copy, Clone, Debug)]
901 struct IndexedU32 {
902 value: u32,
903 index: usize,
904 }
905
906 impl PartialOrd for IndexedU32 {
907 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
908 self.value.partial_cmp(&other.value)
909 }
910 }
911
912 impl PartialEq for IndexedU32 {
913 fn eq(&self, other: &Self) -> bool {
914 self.value == other.value
915 }
916 }
917
918 impl Radixable<u32> for IndexedU32 {
919 type Key = u32;
920 #[inline]
921 fn key(&self) -> Self::Key {
922 self.value
923 }
924 }
925
926 #[derive(Copy, Clone, Debug)]
928 struct IndexedU64 {
929 value: u64,
930 index: usize,
931 }
932
933 impl PartialOrd for IndexedU64 {
934 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
935 self.value.partial_cmp(&other.value)
936 }
937 }
938
939 impl PartialEq for IndexedU64 {
940 fn eq(&self, other: &Self) -> bool {
941 self.value == other.value
942 }
943 }
944
945 impl Radixable<u64> for IndexedU64 {
946 type Key = u64;
947 #[inline]
948 fn key(&self) -> Self::Key {
949 self.value
950 }
951 }
952
953 #[derive(Copy, Clone, Debug)]
955 struct IndexedI32 {
956 value: i32,
957 index: usize,
958 }
959
960 impl PartialOrd for IndexedI32 {
961 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
962 self.value.partial_cmp(&other.value)
963 }
964 }
965
966 impl PartialEq for IndexedI32 {
967 fn eq(&self, other: &Self) -> bool {
968 self.value == other.value
969 }
970 }
971
972 impl Radixable<i32> for IndexedI32 {
973 type Key = i32;
974 #[inline]
975 fn key(&self) -> Self::Key {
976 self.value
977 }
978 }
979
980 #[derive(Copy, Clone, Debug)]
982 struct IndexedI64 {
983 value: i64,
984 index: usize,
985 }
986
987 impl PartialOrd for IndexedI64 {
988 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
989 self.value.partial_cmp(&other.value)
990 }
991 }
992
993 impl PartialEq for IndexedI64 {
994 fn eq(&self, other: &Self) -> bool {
995 self.value == other.value
996 }
997 }
998
999 impl Radixable<i64> for IndexedI64 {
1000 type Key = i64;
1001 #[inline]
1002 fn key(&self) -> Self::Key {
1003 self.value
1004 }
1005 }
1006
1007 pub fn argsort_simd_u32(data: &[u32], descending: bool) -> Vec<usize> {
1009 let n = data.len();
1010 if n == 0 {
1011 return vec![];
1012 }
1013
1014 let mut indexed: Vec<IndexedU32> = data
1015 .iter()
1016 .enumerate()
1017 .map(|(index, &value)| IndexedU32 { value, index })
1018 .collect();
1019
1020 indexed.voracious_sort();
1021
1022 if descending {
1023 indexed.iter().rev().map(|x| x.index).collect()
1024 } else {
1025 indexed.iter().map(|x| x.index).collect()
1026 }
1027 }
1028
1029 pub fn argsort_simd_u64(data: &[u64], descending: bool) -> Vec<usize> {
1031 let n = data.len();
1032 if n == 0 {
1033 return vec![];
1034 }
1035
1036 let mut indexed: Vec<IndexedU64> = data
1037 .iter()
1038 .enumerate()
1039 .map(|(index, &value)| IndexedU64 { value, index })
1040 .collect();
1041
1042 indexed.voracious_sort();
1043
1044 if descending {
1045 indexed.iter().rev().map(|x| x.index).collect()
1046 } else {
1047 indexed.iter().map(|x| x.index).collect()
1048 }
1049 }
1050
1051 pub fn argsort_simd_i32(data: &[i32], descending: bool) -> Vec<usize> {
1053 let n = data.len();
1054 if n == 0 {
1055 return vec![];
1056 }
1057
1058 let mut indexed: Vec<IndexedI32> = data
1059 .iter()
1060 .enumerate()
1061 .map(|(index, &value)| IndexedI32 { value, index })
1062 .collect();
1063
1064 indexed.voracious_sort();
1065
1066 if descending {
1067 indexed.iter().rev().map(|x| x.index).collect()
1068 } else {
1069 indexed.iter().map(|x| x.index).collect()
1070 }
1071 }
1072
1073 pub fn argsort_simd_i64(data: &[i64], descending: bool) -> Vec<usize> {
1075 let n = data.len();
1076 if n == 0 {
1077 return vec![];
1078 }
1079
1080 let mut indexed: Vec<IndexedI64> = data
1081 .iter()
1082 .enumerate()
1083 .map(|(index, &value)| IndexedI64 { value, index })
1084 .collect();
1085
1086 indexed.voracious_sort();
1087
1088 if descending {
1089 indexed.iter().rev().map(|x| x.index).collect()
1090 } else {
1091 indexed.iter().map(|x| x.index).collect()
1092 }
1093 }
1094}
1095
1096pub fn argsort_auto_i32(data: &[i32], config: &ArgsortConfig) -> Vec<usize> {
1104 match config.algorithm {
1105 SortAlgorithm::Comparison => argsort_with_stability(data, config.descending, config.stable),
1106 SortAlgorithm::Radix => argsort_radix_i32(data, config.descending),
1107 #[cfg(feature = "simd_sort")]
1108 SortAlgorithm::Simd => simd_argsort::argsort_simd_i32(data, config.descending),
1109 SortAlgorithm::Auto => {
1110 if config.stable {
1111 return argsort_with_stability(data, config.descending, true);
1112 }
1113 #[cfg(feature = "simd_sort")]
1114 {
1115 simd_argsort::argsort_simd_i32(data, config.descending)
1116 }
1117 #[cfg(not(feature = "simd_sort"))]
1118 {
1119 argsort_radix_i32(data, config.descending)
1120 }
1121 }
1122 }
1123}
1124
1125pub fn argsort_auto_i64(data: &[i64], config: &ArgsortConfig) -> Vec<usize> {
1127 match config.algorithm {
1128 SortAlgorithm::Comparison => argsort_with_stability(data, config.descending, config.stable),
1129 SortAlgorithm::Radix => argsort_radix_i64(data, config.descending),
1130 #[cfg(feature = "simd_sort")]
1131 SortAlgorithm::Simd => simd_argsort::argsort_simd_i64(data, config.descending),
1132 SortAlgorithm::Auto => {
1133 if config.stable {
1134 return argsort_with_stability(data, config.descending, true);
1135 }
1136 #[cfg(feature = "simd_sort")]
1137 {
1138 simd_argsort::argsort_simd_i64(data, config.descending)
1139 }
1140 #[cfg(not(feature = "simd_sort"))]
1141 {
1142 argsort_radix_i64(data, config.descending)
1143 }
1144 }
1145 }
1146}
1147
1148pub fn argsort_auto_u32(data: &[u32], config: &ArgsortConfig) -> Vec<usize> {
1150 match config.algorithm {
1151 SortAlgorithm::Comparison => argsort_with_stability(data, config.descending, config.stable),
1152 SortAlgorithm::Radix => argsort_radix_u32(data, config.descending),
1153 #[cfg(feature = "simd_sort")]
1154 SortAlgorithm::Simd => simd_argsort::argsort_simd_u32(data, config.descending),
1155 SortAlgorithm::Auto => {
1156 if config.stable {
1157 return argsort_with_stability(data, config.descending, true);
1158 }
1159 #[cfg(feature = "simd_sort")]
1160 {
1161 simd_argsort::argsort_simd_u32(data, config.descending)
1162 }
1163 #[cfg(not(feature = "simd_sort"))]
1164 {
1165 argsort_radix_u32(data, config.descending)
1166 }
1167 }
1168 }
1169}
1170
1171pub fn argsort_auto_u64(data: &[u64], config: &ArgsortConfig) -> Vec<usize> {
1173 match config.algorithm {
1174 SortAlgorithm::Comparison => argsort_with_stability(data, config.descending, config.stable),
1175 SortAlgorithm::Radix => argsort_radix_u64(data, config.descending),
1176 #[cfg(feature = "simd_sort")]
1177 SortAlgorithm::Simd => simd_argsort::argsort_simd_u64(data, config.descending),
1178 SortAlgorithm::Auto => {
1179 if config.stable {
1180 return argsort_with_stability(data, config.descending, true);
1181 }
1182 #[cfg(feature = "simd_sort")]
1183 {
1184 simd_argsort::argsort_simd_u64(data, config.descending)
1185 }
1186 #[cfg(not(feature = "simd_sort"))]
1187 {
1188 argsort_radix_u64(data, config.descending)
1189 }
1190 }
1191 }
1192}
1193
1194#[cfg(test)]
1195mod tests {
1196 use minarrow::vec64;
1197
1198 use super::*;
1199
1200 #[test]
1201 fn test_total_cmp_f32_ordering_basic() {
1202 let a = 1.0f32;
1203 let b = 2.0f32;
1204 assert_eq!(total_cmp_f(&a, &b), std::cmp::Ordering::Less);
1205 assert_eq!(total_cmp_f(&b, &a), std::cmp::Ordering::Greater);
1206 assert_eq!(total_cmp_f(&a, &a), std::cmp::Ordering::Equal);
1207 }
1208
1209 #[test]
1210 fn test_total_cmp_f64_ordering_basic() {
1211 let a = 1.0f64;
1212 let b = 2.0f64;
1213 assert_eq!(total_cmp_f(&a, &b), std::cmp::Ordering::Less);
1214 assert_eq!(total_cmp_f(&b, &a), std::cmp::Ordering::Greater);
1215 assert_eq!(total_cmp_f(&a, &a), std::cmp::Ordering::Equal);
1216 }
1217
1218 #[test]
1219 fn test_total_cmp_zero_and_negzero() {
1220 let pz = 0.0f32;
1221 let nz = -0.0f32;
1222 assert_ne!(f32::to_bits(pz), f32::to_bits(nz));
1224 assert_ne!(total_cmp_f(&pz, &nz), std::cmp::Ordering::Equal);
1225 }
1226
1227 #[test]
1228 fn test_total_cmp_nan_ordering_f32() {
1229 let nan = f32::NAN;
1230 let pos_inf = f32::INFINITY;
1231 let neg_inf = f32::NEG_INFINITY;
1232 let one = 1.0f32;
1233
1234 assert_eq!(total_cmp_f(&nan, &pos_inf), std::cmp::Ordering::Greater);
1236 assert_eq!(total_cmp_f(&nan, &neg_inf), std::cmp::Ordering::Greater);
1237 assert_eq!(total_cmp_f(&nan, &one), std::cmp::Ordering::Greater);
1238 assert_eq!(total_cmp_f(&nan, &nan), std::cmp::Ordering::Equal);
1240 }
1241
1242 #[test]
1243 fn test_total_cmp_nan_ordering_f64() {
1244 let nan = f64::NAN;
1245 let pos_inf = f64::INFINITY;
1246 let neg_inf = f64::NEG_INFINITY;
1247 let one = 1.0f64;
1248
1249 assert_eq!(total_cmp_f(&nan, &pos_inf), std::cmp::Ordering::Greater);
1250 assert_eq!(total_cmp_f(&nan, &neg_inf), std::cmp::Ordering::Greater);
1251 assert_eq!(total_cmp_f(&nan, &one), std::cmp::Ordering::Greater);
1252 assert_eq!(total_cmp_f(&nan, &nan), std::cmp::Ordering::Equal);
1253 }
1254
1255 #[test]
1256 fn test_sort_float_f32_all_edge_cases() {
1257 let mut v = vec64![
1258 3.0f32,
1259 -0.0,
1260 0.0,
1261 f32::INFINITY,
1262 f32::NEG_INFINITY,
1263 1.0,
1264 -1.0,
1265 f32::NAN,
1266 2.0,
1267 -2.0,
1268 ];
1269 sort_float(&mut v);
1270 assert_eq!(v[0], f32::NEG_INFINITY);
1273 assert_eq!(v[1], -2.0);
1274 assert_eq!(v[2], -1.0);
1275 assert_eq!(v[3], -0.0);
1276 assert_eq!(v[4], 0.0);
1277 assert_eq!(v[5], 1.0);
1278 assert_eq!(v[6], 2.0);
1279 assert_eq!(v[7], 3.0);
1280 assert_eq!(v[8], f32::INFINITY);
1281 assert!(v[9].is_nan());
1282 }
1283
1284 #[test]
1285 fn test_sort_float_f64_all_edge_cases() {
1286 let mut v = vec64![
1287 3.0f64,
1288 -0.0,
1289 0.0,
1290 f64::INFINITY,
1291 f64::NEG_INFINITY,
1292 1.0,
1293 -1.0,
1294 f64::NAN,
1295 2.0,
1296 -2.0,
1297 ];
1298 sort_float(&mut v);
1299 assert_eq!(v[0], f64::NEG_INFINITY);
1300 assert_eq!(v[1], -2.0);
1301 assert_eq!(v[2], -1.0);
1302 assert_eq!(v[3], -0.0);
1303 assert_eq!(v[4], 0.0);
1304 assert_eq!(v[5], 1.0);
1305 assert_eq!(v[6], 2.0);
1306 assert_eq!(v[7], 3.0);
1307 assert_eq!(v[8], f64::INFINITY);
1308 assert!(v[9].is_nan());
1309 }
1310
1311 #[test]
1312 fn test_sorted_float_immutability_and_return_type() {
1313 let v = vec64![1.0f32, 2.0, 3.0];
1314 let out = sorted_float(&v);
1315 assert_eq!(out.as_slice(), &[1.0, 2.0, 3.0]);
1316 assert_eq!(*v, [1.0, 2.0, 3.0]);
1318 }
1319
1320 #[test]
1321 fn test_sorted_float_correct_for_f64() {
1322 let v = vec64![3.0f64, 2.0, 1.0];
1323 let out = sorted_float(&v);
1324 assert_eq!(out.as_slice(), &[1.0, 2.0, 3.0]);
1325 }
1326
1327 #[test]
1328 fn test_sort_float_empty_and_single() {
1329 let mut v: [f32; 0] = [];
1330 sort_float(&mut v);
1331 let mut v2 = [42.0f32];
1332 sort_float(&mut v2);
1333 assert_eq!(v2, [42.0]);
1334 }
1335
1336 #[cfg(test)]
1337 mod tests {
1338 use super::*;
1339 use minarrow::{Vec64, vec64};
1340
1341 #[test]
1342 fn test_sort_int_empty_and_single() {
1343 let mut arr: [i32; 0] = [];
1344 sort_int(&mut arr);
1345 assert_eq!(arr, [] as [i32; 0]);
1346 let mut arr2 = vec64![42];
1347 sort_int(&mut arr2);
1348 assert_eq!(*arr2, [42]);
1349 }
1350
1351 #[test]
1352 fn test_sort_int_order() {
1353 let mut arr = vec64![4, 2, 7, 1, 1, 6, 0, -5];
1354 sort_int(&mut arr);
1355 assert_eq!(*arr, [-5, 0, 1, 1, 2, 4, 6, 7]);
1356 }
1357
1358 #[test]
1359 fn test_sorted_int_immutability_and_output() {
1360 let arr = vec64![5, 3, 7, 1, 2];
1361 let sorted = sorted_int(&arr);
1362 assert_eq!(sorted.as_slice(), &[1, 2, 3, 5, 7]);
1363 assert_eq!(*arr, [5, 3, 7, 1, 2]);
1365 }
1366
1367 #[test]
1368 fn test_sort_str_basic() {
1369 let mut arr = vec64!["z", "b", "a", "d"];
1370 sort_str(&mut arr);
1371 assert_eq!(*arr, ["a", "b", "d", "z"]);
1372 }
1373
1374 #[test]
1375 fn test_sorted_str_and_non_ascii() {
1376 let arr = vec64!["z", "ä", "b", "a", "d"];
1377 let sorted = sorted_str(&arr);
1378 assert_eq!(sorted.as_slice(), &["a", "b", "d", "z", "ä"]); assert_eq!(*arr, ["z", "ä", "b", "a", "d"]);
1381 }
1382
1383 #[test]
1384 fn test_sort_string_array_basic() {
1385 let offsets = vec![0, 1, 3, 5, 5, 6]; let values = "abcde".to_string() + "f";
1387 let (new_offsets, new_values) = sort_string_array(&offsets, &values);
1388 let slices: Vec<_> = new_offsets
1390 .windows(2)
1391 .map(|w| &new_values[w[0]..w[1]])
1392 .collect();
1393 assert_eq!(slices, &["", "a", "bc", "de", "f"]);
1394 }
1395
1396 #[test]
1397 fn test_sort_categorical_lexical_basic_and_nulls() {
1398 let unique = Vec64::from_slice_clone(&[
1400 "apple".to_string(),
1401 "banana".to_string(),
1402 "pear".to_string(),
1403 ]);
1404 let data = Vec64::from_slice(&[2u8, 0, 1, 1, 2, 0, 1]);
1405 let mask = Bitmask::from_bools(&[true, false, true, true, true, false, true]);
1406 let cat = CategoricalArray {
1407 data: data.into(),
1408 unique_values: unique,
1409 null_mask: Some(mask.clone()),
1410 };
1411 let (sorted_data, sorted_mask) = sort_categorical_lexical(&cat);
1412
1413 let mask_out = sorted_mask.unwrap();
1415 let null_pos: Vec<_> = (0..mask_out.len()).filter(|&i| !mask_out.get(i)).collect();
1416 assert_eq!(null_pos, &[0, 1]);
1417
1418 let sorted_as_str: Vec<_> = sorted_data
1420 .iter()
1421 .map(|&k| &cat.unique_values[k.to_usize()][..])
1422 .collect();
1423 let vals = &sorted_as_str[null_pos.len()..];
1424 assert_eq!(vals, &["banana", "banana", "banana", "pear", "pear"]);
1425 }
1426
1427 #[test]
1428 fn test_sort_categorical_all_nulls_and_no_nulls() {
1429 let unique = Vec64::from_slice_clone(&["x".to_string()]);
1431 let data = Vec64::from_slice(&[0u8, 0, 0]);
1432 let mask = Bitmask::from_bools(&[false, false, false]);
1433 let cat = CategoricalArray {
1434 data: data.clone().into(),
1435 unique_values: unique.clone(),
1436 null_mask: Some(mask.clone()),
1437 };
1438 let (_, sorted_mask) = sort_categorical_lexical(&cat);
1439 assert_eq!(
1440 unpack_bitmask(&sorted_mask.unwrap()),
1441 vec64![false, false, false]
1442 );
1443 let mask2 = Bitmask::from_bools(&[true, true, true]);
1445 let cat2 = CategoricalArray {
1446 data: data.clone().into(),
1447 unique_values: unique,
1448 null_mask: Some(mask2.clone()),
1449 };
1450 let (_, sorted_mask2) = sort_categorical_lexical(&cat2);
1451 assert_eq!(
1452 unpack_bitmask(&sorted_mask2.unwrap()),
1453 vec64![true, true, true]
1454 );
1455 }
1456 #[test]
1457 fn test_sort_boolean_array_with_nulls() {
1458 let mut arr = BooleanArray {
1459 data: pack_bitmask(&[false, true, false, true, true, false]),
1460 null_mask: Some(pack_bitmask(&[true, false, true, true, false, true])),
1461 len: 6,
1462 _phantom: std::marker::PhantomData,
1463 };
1464 sort_boolean_array(&mut arr);
1465 let bits = unpack_bitmask(&arr.data);
1467 let nulls = unpack_bitmask(arr.null_mask.as_ref().unwrap());
1468 assert_eq!(nulls, vec64![false, false, true, true, true, true]);
1469 assert_eq!(&bits[2..], [false, false, false, true]);
1471 }
1472
1473 #[test]
1474 fn test_sort_boolean_array_all_true_and_all_false() {
1475 let mut arr = BooleanArray {
1476 data: pack_bitmask(&[true, true, true]),
1477 null_mask: None,
1478 len: 3,
1479 _phantom: std::marker::PhantomData,
1480 };
1481 sort_boolean_array(&mut arr);
1482 assert_eq!(unpack_bitmask(&arr.data), vec64![true, true, true]);
1483
1484 let mut arr2 = BooleanArray {
1485 data: pack_bitmask(&[false, false, false]),
1486 null_mask: None,
1487 len: 3,
1488 _phantom: std::marker::PhantomData,
1489 };
1490 sort_boolean_array(&mut arr2);
1491 assert_eq!(unpack_bitmask(&arr2.data), vec64![false, false, false]);
1492 }
1493
1494 #[test]
1495 fn test_sort_boolean_array_all_nulls_and_none() {
1496 let mut arr = BooleanArray {
1497 data: pack_bitmask(&[true, false, true]),
1498 null_mask: Some(pack_bitmask(&[false, false, false])),
1499 len: 3,
1500 _phantom: std::marker::PhantomData,
1501 };
1502 sort_boolean_array(&mut arr);
1503 assert_eq!(
1504 unpack_bitmask(arr.null_mask.as_ref().unwrap()),
1505 vec64![false, false, false]
1506 );
1507 }
1508
1509 #[test]
1510 fn test_sort_slice_with_mask_basic() {
1511 let data = vec64![3, 1, 2, 1];
1512 let mask = pack_bitmask(&[true, false, true, true]);
1513 let (sorted, mask_out) = sort_slice_with_mask(&data, Some(&mask));
1514 assert_eq!(sorted.as_slice(), &[1, 1, 2, 3]);
1515 assert_eq!(
1516 unpack_bitmask(&mask_out.unwrap()),
1517 vec64![false, true, true, true]
1518 );
1519 }
1520
1521 #[test]
1522 fn test_sort_slice_with_mask_no_mask() {
1523 let data = vec64![3, 2, 1, 1, 0];
1524 let (sorted, mask_out) = sort_slice_with_mask(&data, None);
1525 assert_eq!(sorted.as_slice(), &[0, 1, 1, 2, 3]);
1526 assert!(mask_out.is_none());
1527 }
1528
1529 #[test]
1530 fn test_sort_slice_with_mask_all_true_and_all_false() {
1531 let data = vec64![3, 2, 1, 0];
1532 let mask_true = pack_bitmask(&[true; 4]);
1533 let mask_false = pack_bitmask(&[false; 4]);
1534 let (_, mask_true_out) = sort_slice_with_mask(&data, Some(&mask_true));
1535 let (_, mask_false_out) = sort_slice_with_mask(&data, Some(&mask_false));
1536 assert_eq!(
1537 unpack_bitmask(&mask_true_out.unwrap()),
1538 vec64![true, true, true, true]
1539 );
1540 assert_eq!(
1541 unpack_bitmask(&mask_false_out.unwrap()),
1542 vec64![false, false, false, false]
1543 );
1544 }
1545
1546 #[test]
1547 fn test_sort_int_with_duplicates_and_negatives() {
1548 let mut arr = vec64![-10, -1, 5, 0, 5, -10];
1549 sort_int(&mut arr);
1550 assert_eq!(*arr, [-10, -10, -1, 0, 5, 5]);
1551 }
1552
1553 #[test]
1554 fn test_sort_str_empty_and_duplicate() {
1555 let mut arr = vec64!["", "a", "b", "a", ""];
1556 sort_str(&mut arr);
1557 assert_eq!(*arr, ["", "", "a", "a", "b"]);
1558 }
1559
1560 #[test]
1565 fn test_argsort_basic() {
1566 let data = [5i32, 2, 8, 1, 9];
1567 let indices = argsort(&data, false);
1568 assert_eq!(indices, vec![3, 1, 0, 2, 4]); let sorted: Vec<i32> = indices.iter().map(|&i| data[i]).collect();
1571 assert_eq!(sorted, vec![1, 2, 5, 8, 9]);
1572 }
1573
1574 #[test]
1575 fn test_argsort_descending() {
1576 let data = [5i32, 2, 8, 1, 9];
1577 let indices = argsort(&data, true);
1578 let sorted: Vec<i32> = indices.iter().map(|&i| data[i]).collect();
1579 assert_eq!(sorted, vec![9, 8, 5, 2, 1]);
1580 }
1581
1582 #[test]
1583 fn test_argsort_empty() {
1584 let data: [i32; 0] = [];
1585 let indices = argsort(&data, false);
1586 assert!(indices.is_empty());
1587 }
1588
1589 #[test]
1590 fn test_argsort_float_basic() {
1591 let data = [3.0f64, 1.0, 4.0, 1.5, 2.0];
1592 let indices = argsort_float(&data, false);
1593 let sorted: Vec<f64> = indices.iter().map(|&i| data[i]).collect();
1594 assert_eq!(sorted, vec![1.0, 1.5, 2.0, 3.0, 4.0]);
1595 }
1596
1597 #[test]
1598 fn test_argsort_float_with_nan() {
1599 let data = [3.0f64, f64::NAN, 1.0, f64::NEG_INFINITY];
1600 let indices = argsort_float(&data, false);
1601 let sorted: Vec<f64> = indices.iter().map(|&i| data[i]).collect();
1602 assert_eq!(sorted[0], f64::NEG_INFINITY);
1604 assert_eq!(sorted[1], 1.0);
1605 assert_eq!(sorted[2], 3.0);
1606 assert!(sorted[3].is_nan());
1607 }
1608
1609 #[test]
1610 fn test_argsort_str_basic() {
1611 let data = ["cherry", "apple", "banana"];
1612 let indices = argsort_str(&data, false);
1613 let sorted: Vec<&str> = indices.iter().map(|&i| data[i]).collect();
1614 assert_eq!(sorted, vec!["apple", "banana", "cherry"]);
1615 }
1616
1617 #[test]
1618 fn test_argsort_str_descending() {
1619 let data = ["cherry", "apple", "banana"];
1620 let indices = argsort_str(&data, true);
1621 let sorted: Vec<&str> = indices.iter().map(|&i| data[i]).collect();
1622 assert_eq!(sorted, vec!["cherry", "banana", "apple"]);
1623 }
1624
1625 #[test]
1626 fn test_argsort_string_array_basic() {
1627 let values = "applecherrybanana";
1629 let offsets = [0usize, 5, 11, 17];
1630 let indices = argsort_string_array(&offsets, values, false);
1631 assert_eq!(indices, vec![0, 2, 1]);
1633 }
1634
1635 #[test]
1636 fn test_argsort_radix_u32_basic() {
1637 let data = vec![5u32, 2, 8, 1, 9, 3, 7, 4, 6, 0];
1638 let indices = argsort_radix_u32(&data, false);
1639 let sorted: Vec<u32> = indices.iter().map(|&i| data[i]).collect();
1640 assert_eq!(sorted, vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]);
1641 }
1642
1643 #[test]
1644 fn test_argsort_radix_u32_descending() {
1645 let data = vec![5u32, 2, 8, 1, 9, 3, 7, 4, 6, 0];
1646 let indices = argsort_radix_u32(&data, true);
1647 let sorted: Vec<u32> = indices.iter().map(|&i| data[i]).collect();
1648 assert_eq!(sorted, vec![9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
1649 }
1650
1651 #[test]
1652 fn test_argsort_radix_i32_with_negatives() {
1653 let data = vec![5i32, -2, 8, -1, 9, 3, -7, 4, 6, 0];
1654 let indices = argsort_radix_i32(&data, false);
1655 let sorted: Vec<i32> = indices.iter().map(|&i| data[i]).collect();
1656 assert_eq!(sorted, vec![-7, -2, -1, 0, 3, 4, 5, 6, 8, 9]);
1657 }
1658
1659 #[test]
1660 fn test_argsort_radix_u64_basic() {
1661 let data = vec![100u64, 50, 200, 25];
1662 let indices = argsort_radix_u64(&data, false);
1663 let sorted: Vec<u64> = indices.iter().map(|&i| data[i]).collect();
1664 assert_eq!(sorted, vec![25, 50, 100, 200]);
1665 }
1666
1667 #[test]
1668 fn test_argsort_radix_i64_with_negatives() {
1669 let data = vec![5i64, -2, 8, -1, 0];
1670 let indices = argsort_radix_i64(&data, false);
1671 let sorted: Vec<i64> = indices.iter().map(|&i| data[i]).collect();
1672 assert_eq!(sorted, vec![-2, -1, 0, 5, 8]);
1673 }
1674
1675 #[test]
1676 fn test_argsort_boolean_basic() {
1677 use minarrow::Bitmask;
1678 let mut data = Bitmask::new_set_all(4, false);
1680 data.set(0, true);
1681 data.set(2, true);
1682
1683 let indices = argsort_boolean(&data, None, false);
1684 let sorted: Vec<bool> = indices.iter().map(|&i| data.get(i)).collect();
1686 assert_eq!(sorted, vec![false, false, true, true]);
1687 }
1688
1689 #[test]
1690 fn test_argsort_boolean_descending() {
1691 use minarrow::Bitmask;
1692 let mut data = Bitmask::new_set_all(4, false);
1693 data.set(0, true);
1694 data.set(2, true);
1695
1696 let indices = argsort_boolean(&data, None, true);
1697 let sorted: Vec<bool> = indices.iter().map(|&i| data.get(i)).collect();
1698 assert_eq!(sorted, vec![true, true, false, false]);
1699 }
1700
1701 #[test]
1702 fn test_argsort_boolean_with_nulls() {
1703 use minarrow::Bitmask;
1704 let mut data = Bitmask::new_set_all(4, false);
1707 data.set(0, true);
1708 data.set(2, true);
1709
1710 let mut null_mask = Bitmask::new_set_all(4, true);
1711 null_mask.set(1, false); let indices = argsort_boolean(&data, Some(&null_mask), false);
1714 assert_eq!(indices[0], 1); }
1717
1718 #[test]
1719 fn test_argsort_auto_i32_basic() {
1720 let data = [5i32, 2, 8, 1, 9];
1721 let config = ArgsortConfig::default();
1722 let indices = argsort_auto_i32(&data, &config);
1723 let sorted: Vec<i32> = indices.iter().map(|&i| data[i]).collect();
1724 assert_eq!(sorted, vec![1, 2, 5, 8, 9]);
1725 }
1726
1727 #[test]
1728 fn test_argsort_auto_with_comparison() {
1729 let data = [5i32, 2, 8, 1, 9];
1730 let config = ArgsortConfig::new().algorithm(SortAlgorithm::Comparison);
1731 let indices = argsort_auto_i32(&data, &config);
1732 let sorted: Vec<i32> = indices.iter().map(|&i| data[i]).collect();
1733 assert_eq!(sorted, vec![1, 2, 5, 8, 9]);
1734 }
1735
1736 #[test]
1737 fn test_argsort_auto_descending() {
1738 let data = [5i32, 2, 8, 1, 9];
1739 let config = ArgsortConfig::new().descending(true);
1740 let indices = argsort_auto_i32(&data, &config);
1741 let sorted: Vec<i32> = indices.iter().map(|&i| data[i]).collect();
1742 assert_eq!(sorted, vec![9, 8, 5, 2, 1]);
1743 }
1744
1745 fn apply_indices<T: Copy>(data: &[T], indices: &[usize]) -> Vec<T> {
1753 indices.iter().map(|&i| data[i]).collect()
1754 }
1755
1756 #[test]
1757 fn test_i32_sorts_produce_same_results() {
1758 let data = vec![
1760 5i32,
1761 -2,
1762 8,
1763 -1,
1764 9,
1765 3,
1766 -7,
1767 4,
1768 6,
1769 0,
1770 i32::MAX,
1771 i32::MIN,
1772 0,
1773 -100,
1774 100,
1775 1,
1776 1,
1777 1,
1778 -1,
1779 -1, ];
1781
1782 let comparison_asc = argsort(&data, false);
1784 let radix_asc = argsort_radix_i32(&data, false);
1785 let auto_comparison = argsort_auto_i32(
1786 &data,
1787 &ArgsortConfig::new().algorithm(SortAlgorithm::Comparison),
1788 );
1789 let auto_radix =
1790 argsort_auto_i32(&data, &ArgsortConfig::new().algorithm(SortAlgorithm::Radix));
1791 let auto_default = argsort_auto_i32(&data, &ArgsortConfig::default());
1792
1793 let sorted_comparison: Vec<i32> = apply_indices(&data, &comparison_asc);
1795 let sorted_radix: Vec<i32> = apply_indices(&data, &radix_asc);
1796 let sorted_auto_comparison: Vec<i32> = apply_indices(&data, &auto_comparison);
1797 let sorted_auto_radix: Vec<i32> = apply_indices(&data, &auto_radix);
1798 let sorted_auto_default: Vec<i32> = apply_indices(&data, &auto_default);
1799
1800 assert_eq!(
1802 sorted_comparison, sorted_radix,
1803 "comparison vs radix mismatch"
1804 );
1805 assert_eq!(
1806 sorted_comparison, sorted_auto_comparison,
1807 "comparison vs auto_comparison mismatch"
1808 );
1809 assert_eq!(
1810 sorted_comparison, sorted_auto_radix,
1811 "comparison vs auto_radix mismatch"
1812 );
1813 assert_eq!(
1814 sorted_comparison, sorted_auto_default,
1815 "comparison vs auto_default mismatch"
1816 );
1817
1818 for i in 1..sorted_comparison.len() {
1820 assert!(
1821 sorted_comparison[i - 1] <= sorted_comparison[i],
1822 "not sorted at index {}",
1823 i
1824 );
1825 }
1826
1827 let comparison_desc = argsort(&data, true);
1829 let radix_desc = argsort_radix_i32(&data, true);
1830
1831 let sorted_comparison_desc: Vec<i32> = apply_indices(&data, &comparison_desc);
1832 let sorted_radix_desc: Vec<i32> = apply_indices(&data, &radix_desc);
1833
1834 assert_eq!(
1835 sorted_comparison_desc, sorted_radix_desc,
1836 "descending comparison vs radix mismatch"
1837 );
1838
1839 for i in 1..sorted_comparison_desc.len() {
1841 assert!(
1842 sorted_comparison_desc[i - 1] >= sorted_comparison_desc[i],
1843 "not sorted descending at index {}",
1844 i
1845 );
1846 }
1847 }
1848
1849 #[test]
1850 fn test_i64_sorts_produce_same_results() {
1851 let data = vec![
1852 5i64,
1853 -2,
1854 8,
1855 -1,
1856 9,
1857 3,
1858 -7,
1859 4,
1860 6,
1861 0,
1862 i64::MAX,
1863 i64::MIN,
1864 0,
1865 -100,
1866 100,
1867 i64::MAX - 1,
1868 i64::MIN + 1,
1869 ];
1870
1871 let comparison_asc = argsort(&data, false);
1872 let radix_asc = argsort_radix_i64(&data, false);
1873 let auto_comparison = argsort_auto_i64(
1874 &data,
1875 &ArgsortConfig::new().algorithm(SortAlgorithm::Comparison),
1876 );
1877 let auto_radix =
1878 argsort_auto_i64(&data, &ArgsortConfig::new().algorithm(SortAlgorithm::Radix));
1879
1880 let sorted_comparison: Vec<i64> = apply_indices(&data, &comparison_asc);
1881 let sorted_radix: Vec<i64> = apply_indices(&data, &radix_asc);
1882 let sorted_auto_comparison: Vec<i64> = apply_indices(&data, &auto_comparison);
1883 let sorted_auto_radix: Vec<i64> = apply_indices(&data, &auto_radix);
1884
1885 assert_eq!(
1886 sorted_comparison, sorted_radix,
1887 "i64: comparison vs radix mismatch"
1888 );
1889 assert_eq!(
1890 sorted_comparison, sorted_auto_comparison,
1891 "i64: comparison vs auto_comparison mismatch"
1892 );
1893 assert_eq!(
1894 sorted_comparison, sorted_auto_radix,
1895 "i64: comparison vs auto_radix mismatch"
1896 );
1897
1898 for i in 1..sorted_comparison.len() {
1900 assert!(
1901 sorted_comparison[i - 1] <= sorted_comparison[i],
1902 "i64: not sorted at index {}",
1903 i
1904 );
1905 }
1906 }
1907
1908 #[test]
1909 fn test_u32_sorts_produce_same_results() {
1910 let data = vec![
1911 5u32,
1912 2,
1913 8,
1914 1,
1915 9,
1916 3,
1917 7,
1918 4,
1919 6,
1920 0,
1921 u32::MAX,
1922 0,
1923 100,
1924 u32::MAX - 1,
1925 1,
1926 1,
1927 1, ];
1929
1930 let comparison_asc = argsort(&data, false);
1931 let radix_asc = argsort_radix_u32(&data, false);
1932 let auto_comparison = argsort_auto_u32(
1933 &data,
1934 &ArgsortConfig::new().algorithm(SortAlgorithm::Comparison),
1935 );
1936 let auto_radix =
1937 argsort_auto_u32(&data, &ArgsortConfig::new().algorithm(SortAlgorithm::Radix));
1938
1939 let sorted_comparison: Vec<u32> = apply_indices(&data, &comparison_asc);
1940 let sorted_radix: Vec<u32> = apply_indices(&data, &radix_asc);
1941 let sorted_auto_comparison: Vec<u32> = apply_indices(&data, &auto_comparison);
1942 let sorted_auto_radix: Vec<u32> = apply_indices(&data, &auto_radix);
1943
1944 assert_eq!(
1945 sorted_comparison, sorted_radix,
1946 "u32: comparison vs radix mismatch"
1947 );
1948 assert_eq!(
1949 sorted_comparison, sorted_auto_comparison,
1950 "u32: comparison vs auto_comparison mismatch"
1951 );
1952 assert_eq!(
1953 sorted_comparison, sorted_auto_radix,
1954 "u32: comparison vs auto_radix mismatch"
1955 );
1956
1957 for i in 1..sorted_comparison.len() {
1959 assert!(
1960 sorted_comparison[i - 1] <= sorted_comparison[i],
1961 "u32: not sorted at index {}",
1962 i
1963 );
1964 }
1965 }
1966
1967 #[test]
1968 fn test_u64_sorts_produce_same_results() {
1969 let data = vec![
1970 5u64,
1971 2,
1972 8,
1973 1,
1974 9,
1975 3,
1976 7,
1977 4,
1978 6,
1979 0,
1980 u64::MAX,
1981 0,
1982 100,
1983 u64::MAX - 1,
1984 ];
1985
1986 let comparison_asc = argsort(&data, false);
1987 let radix_asc = argsort_radix_u64(&data, false);
1988 let auto_comparison = argsort_auto_u64(
1989 &data,
1990 &ArgsortConfig::new().algorithm(SortAlgorithm::Comparison),
1991 );
1992 let auto_radix =
1993 argsort_auto_u64(&data, &ArgsortConfig::new().algorithm(SortAlgorithm::Radix));
1994
1995 let sorted_comparison: Vec<u64> = apply_indices(&data, &comparison_asc);
1996 let sorted_radix: Vec<u64> = apply_indices(&data, &radix_asc);
1997 let sorted_auto_comparison: Vec<u64> = apply_indices(&data, &auto_comparison);
1998 let sorted_auto_radix: Vec<u64> = apply_indices(&data, &auto_radix);
1999
2000 assert_eq!(
2001 sorted_comparison, sorted_radix,
2002 "u64: comparison vs radix mismatch"
2003 );
2004 assert_eq!(
2005 sorted_comparison, sorted_auto_comparison,
2006 "u64: comparison vs auto_comparison mismatch"
2007 );
2008 assert_eq!(
2009 sorted_comparison, sorted_auto_radix,
2010 "u64: comparison vs auto_radix mismatch"
2011 );
2012
2013 for i in 1..sorted_comparison.len() {
2015 assert!(
2016 sorted_comparison[i - 1] <= sorted_comparison[i],
2017 "u64: not sorted at index {}",
2018 i
2019 );
2020 }
2021 }
2022
2023 #[cfg(feature = "simd_sort")]
2024 #[test]
2025 fn test_i32_sorts_produce_same_results_with_simd() {
2026 use super::simd_argsort;
2027
2028 let data = vec![
2029 5i32,
2030 -2,
2031 8,
2032 -1,
2033 9,
2034 3,
2035 -7,
2036 4,
2037 6,
2038 0,
2039 i32::MAX,
2040 i32::MIN,
2041 0,
2042 -100,
2043 100,
2044 1,
2045 1,
2046 1,
2047 -1,
2048 -1,
2049 ];
2050
2051 let comparison_asc = argsort(&data, false);
2052 let radix_asc = argsort_radix_i32(&data, false);
2053 let simd_asc = simd_argsort::argsort_simd_i32(&data, false);
2054 let auto_simd =
2055 argsort_auto_i32(&data, &ArgsortConfig::new().algorithm(SortAlgorithm::Simd));
2056
2057 let sorted_comparison: Vec<i32> = apply_indices(&data, &comparison_asc);
2058 let sorted_radix: Vec<i32> = apply_indices(&data, &radix_asc);
2059 let sorted_simd: Vec<i32> = apply_indices(&data, &simd_asc);
2060 let sorted_auto_simd: Vec<i32> = apply_indices(&data, &auto_simd);
2061
2062 assert_eq!(
2063 sorted_comparison, sorted_radix,
2064 "simd test: comparison vs radix mismatch"
2065 );
2066 assert_eq!(
2067 sorted_comparison, sorted_simd,
2068 "simd test: comparison vs simd mismatch"
2069 );
2070 assert_eq!(
2071 sorted_comparison, sorted_auto_simd,
2072 "simd test: comparison vs auto_simd mismatch"
2073 );
2074 }
2075
2076 #[cfg(feature = "simd_sort")]
2077 #[test]
2078 fn test_i64_sorts_produce_same_results_with_simd() {
2079 use super::simd_argsort;
2080
2081 let data = vec![
2082 5i64,
2083 -2,
2084 8,
2085 -1,
2086 9,
2087 3,
2088 -7,
2089 4,
2090 6,
2091 0,
2092 i64::MAX,
2093 i64::MIN,
2094 0,
2095 -100,
2096 100,
2097 ];
2098
2099 let comparison_asc = argsort(&data, false);
2100 let radix_asc = argsort_radix_i64(&data, false);
2101 let simd_asc = simd_argsort::argsort_simd_i64(&data, false);
2102
2103 let sorted_comparison: Vec<i64> = apply_indices(&data, &comparison_asc);
2104 let sorted_radix: Vec<i64> = apply_indices(&data, &radix_asc);
2105 let sorted_simd: Vec<i64> = apply_indices(&data, &simd_asc);
2106
2107 assert_eq!(
2108 sorted_comparison, sorted_radix,
2109 "simd i64: comparison vs radix mismatch"
2110 );
2111 assert_eq!(
2112 sorted_comparison, sorted_simd,
2113 "simd i64: comparison vs simd mismatch"
2114 );
2115 }
2116
2117 #[cfg(feature = "simd_sort")]
2118 #[test]
2119 fn test_u32_sorts_produce_same_results_with_simd() {
2120 use super::simd_argsort;
2121
2122 let data = vec![
2123 5u32,
2124 2,
2125 8,
2126 1,
2127 9,
2128 3,
2129 7,
2130 4,
2131 6,
2132 0,
2133 u32::MAX,
2134 0,
2135 100,
2136 u32::MAX - 1,
2137 ];
2138
2139 let comparison_asc = argsort(&data, false);
2140 let radix_asc = argsort_radix_u32(&data, false);
2141 let simd_asc = simd_argsort::argsort_simd_u32(&data, false);
2142
2143 let sorted_comparison: Vec<u32> = apply_indices(&data, &comparison_asc);
2144 let sorted_radix: Vec<u32> = apply_indices(&data, &radix_asc);
2145 let sorted_simd: Vec<u32> = apply_indices(&data, &simd_asc);
2146
2147 assert_eq!(
2148 sorted_comparison, sorted_radix,
2149 "simd u32: comparison vs radix mismatch"
2150 );
2151 assert_eq!(
2152 sorted_comparison, sorted_simd,
2153 "simd u32: comparison vs simd mismatch"
2154 );
2155 }
2156
2157 #[cfg(feature = "simd_sort")]
2158 #[test]
2159 fn test_u64_sorts_produce_same_results_with_simd() {
2160 use super::simd_argsort;
2161
2162 let data = vec![
2163 5u64,
2164 2,
2165 8,
2166 1,
2167 9,
2168 3,
2169 7,
2170 4,
2171 6,
2172 0,
2173 u64::MAX,
2174 0,
2175 100,
2176 u64::MAX - 1,
2177 ];
2178
2179 let comparison_asc = argsort(&data, false);
2180 let radix_asc = argsort_radix_u64(&data, false);
2181 let simd_asc = simd_argsort::argsort_simd_u64(&data, false);
2182
2183 let sorted_comparison: Vec<u64> = apply_indices(&data, &comparison_asc);
2184 let sorted_radix: Vec<u64> = apply_indices(&data, &radix_asc);
2185 let sorted_simd: Vec<u64> = apply_indices(&data, &simd_asc);
2186
2187 assert_eq!(
2188 sorted_comparison, sorted_radix,
2189 "simd u64: comparison vs radix mismatch"
2190 );
2191 assert_eq!(
2192 sorted_comparison, sorted_simd,
2193 "simd u64: comparison vs simd mismatch"
2194 );
2195 }
2196
2197 #[test]
2199 fn test_numeric_sorts_produce_same_results_large_dataset() {
2200 fn lcg_next(state: &mut u64) -> i32 {
2202 *state = state.wrapping_mul(6364136223846793005).wrapping_add(1);
2203 (*state >> 33) as i32
2204 }
2205
2206 let mut state = 12345u64;
2207 let data: Vec<i32> = (0..10_000).map(|_| lcg_next(&mut state)).collect();
2208
2209 let comparison_asc = argsort(&data, false);
2210 let radix_asc = argsort_radix_i32(&data, false);
2211
2212 let sorted_comparison: Vec<i32> = apply_indices(&data, &comparison_asc);
2213 let sorted_radix: Vec<i32> = apply_indices(&data, &radix_asc);
2214
2215 assert_eq!(
2216 sorted_comparison, sorted_radix,
2217 "large dataset: comparison vs radix mismatch"
2218 );
2219
2220 for i in 1..sorted_comparison.len() {
2222 assert!(
2223 sorted_comparison[i - 1] <= sorted_comparison[i],
2224 "large dataset: not sorted at index {}",
2225 i
2226 );
2227 }
2228 }
2229
2230 #[test]
2235 fn test_stable_sort_preserves_relative_order() {
2236 let data = [3i32, 1, 2, 1];
2238 let indices = argsort_with_stability(&data, false, true);
2239 assert_eq!(indices[0], 1); assert_eq!(indices[1], 3); assert_eq!(indices[2], 2); assert_eq!(indices[3], 0); }
2245
2246 #[test]
2247 fn test_stable_float_preserves_relative_order() {
2248 let data = [3.0f64, 1.0, 2.0, 1.0];
2250 let indices = argsort_float_with_stability(&data, false, true);
2251 assert_eq!(indices[0], 1); assert_eq!(indices[1], 3); assert_eq!(indices[2], 2); assert_eq!(indices[3], 0); }
2256
2257 #[test]
2258 fn test_argsort_config_stable_default_true() {
2259 let config = ArgsortConfig::default();
2260 assert!(config.stable);
2261 }
2262
2263 #[test]
2264 fn test_argsort_auto_stable_preserves_order() {
2265 let data = [3i32, 1, 2, 1];
2266 let config = ArgsortConfig::new(); let indices = argsort_auto_i32(&data, &config);
2268 assert_eq!(indices[0], 1); assert_eq!(indices[1], 3); }
2272
2273 #[test]
2274 fn test_argsort_auto_unstable_still_sorts_correctly() {
2275 let data = [5i32, 2, 8, 1, 9];
2276 let config = ArgsortConfig::new().stable(false);
2277 let indices = argsort_auto_i32(&data, &config);
2278 let sorted: Vec<i32> = indices.iter().map(|&i| data[i]).collect();
2279 assert_eq!(sorted, vec![1, 2, 5, 8, 9]);
2280 }
2281 }
2282}