use std::cmp::Ordering;
use minarrow::{Bitmask, BooleanArray, CategoricalArray, Integer, MaskedArray, Vec64};
use num_traits::{Float, NumCast, Zero};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum SortAlgorithm {
#[default]
Auto,
Comparison,
Radix,
#[cfg(feature = "simd_sort")]
Simd,
}
#[derive(Debug, Clone)]
pub struct ArgsortConfig {
pub descending: bool,
pub algorithm: SortAlgorithm,
pub stable: bool,
pub parallel: bool,
}
impl Default for ArgsortConfig {
fn default() -> Self {
Self {
descending: false,
algorithm: SortAlgorithm::default(),
stable: true,
parallel: false,
}
}
}
impl ArgsortConfig {
pub fn new() -> Self {
Self::default()
}
pub fn descending(mut self, descending: bool) -> Self {
self.descending = descending;
self
}
pub fn algorithm(mut self, algorithm: SortAlgorithm) -> Self {
self.algorithm = algorithm;
self
}
pub fn stable(mut self, stable: bool) -> Self {
self.stable = stable;
self
}
pub fn parallel(mut self, parallel: bool) -> Self {
self.parallel = parallel;
self
}
}
#[inline]
pub fn sort_float<T: Float>(slice: &mut [T]) {
slice.sort_unstable_by(total_cmp_f);
}
#[inline(always)]
pub fn total_cmp_f<T: Float>(a: &T, b: &T) -> Ordering {
if std::mem::size_of::<T>() == 4 {
let ua = unsafe { *(a as *const T as *const u32) };
let ub = unsafe { *(b as *const T as *const u32) };
let ka = if ua & 0x8000_0000 != 0 {
!ua
} else {
ua ^ 0x8000_0000
};
let kb = if ub & 0x8000_0000 != 0 {
!ub
} else {
ub ^ 0x8000_0000
};
ka.cmp(&kb)
} else if std::mem::size_of::<T>() == 8 {
let ua = unsafe { *(a as *const T as *const u64) };
let ub = unsafe { *(b as *const T as *const u64) };
let ka = if ua & 0x8000_0000_0000_0000 != 0 {
!ua
} else {
ua ^ 0x8000_0000_0000_0000
};
let kb = if ub & 0x8000_0000_0000_0000 != 0 {
!ub
} else {
ub ^ 0x8000_0000_0000_0000
};
ka.cmp(&kb)
} else {
unreachable!("Only f32 and f64 are valid Float types.")
}
}
#[inline]
pub fn sorted_float_to<T: Float>(data: &[T], out: &mut [T]) {
assert_eq!(
data.len(),
out.len(),
"sorted_float_to: input/output length mismatch"
);
out.copy_from_slice(data);
sort_float(out);
}
pub fn sorted_float<T: Float>(data: &[T]) -> Vec64<T> {
let mut v = Vec64::from_slice(data);
sort_float(&mut v);
v
}
#[inline]
pub fn sort_int<T: Ord + Copy>(slice: &mut [T]) {
slice.sort_unstable();
}
#[inline]
pub fn sorted_int_to<T: Ord + Copy>(data: &[T], out: &mut [T]) {
assert_eq!(
data.len(),
out.len(),
"sorted_int_to: input/output length mismatch"
);
out.copy_from_slice(data);
sort_int(out);
}
pub fn sorted_int<T: Ord + Copy>(data: &[T]) -> Vec64<T> {
let mut v = Vec64::from_slice(data);
sort_int(&mut v);
v
}
#[inline]
pub fn sort_str(slice: &mut [&str]) {
slice.sort_unstable();
}
pub fn sorted_str(data: &[&str]) -> Vec64<String> {
let mut v = Vec64::from_slice(data);
sort_str(&mut v);
v.iter().map(|s| s.to_string()).collect()
}
pub fn sort_string_array(offsets: &[usize], values: &str) -> (Vec64<usize>, String) {
let n = offsets.len() - 1;
let mut indices: Vec<usize> = (0..n).collect();
indices.sort_unstable_by(|&i, &j| {
let a = &values[offsets[i]..offsets[i + 1]];
let b = &values[offsets[j]..offsets[j + 1]];
a.cmp(b)
});
let total_bytes: usize = indices
.iter()
.map(|&idx| offsets[idx + 1] - offsets[idx])
.sum();
let mut new_values = String::with_capacity(total_bytes);
let mut new_offsets = Vec64::<usize>::with_capacity(n + 1);
unsafe {
new_offsets.set_len(n + 1);
}
unsafe {
new_values.as_mut_vec().set_len(total_bytes);
}
let values_bytes = values.as_bytes();
let out_bytes = unsafe { new_values.as_mut_vec() };
unsafe {
*new_offsets.get_unchecked_mut(0) = 0;
}
let mut current_offset = 0;
let mut out_ptr = 0;
for (i, &idx) in indices.iter().enumerate() {
let start = offsets[idx];
let end = offsets[idx + 1];
let len = end - start;
unsafe {
std::ptr::copy_nonoverlapping(
values_bytes.as_ptr().add(start),
out_bytes.as_mut_ptr().add(out_ptr),
len,
);
current_offset += len;
*new_offsets.get_unchecked_mut(i + 1) = current_offset;
}
out_ptr += len;
}
unsafe {
new_values.as_mut_vec().set_len(current_offset);
}
(new_offsets, new_values)
}
pub fn sort_categorical_lexical<T: Ord + Copy + Zero + NumCast + Integer>(
cat: &CategoricalArray<T>,
) -> (Vec64<T>, Option<Bitmask>) {
let len = cat.data.len();
let mut indices: Vec<usize> = (0..len).collect();
indices.sort_by(|&i, &j| {
let a_is_null = cat.is_null(i);
let b_is_null = cat.is_null(j);
match (a_is_null, b_is_null) {
(true, true) => Ordering::Equal,
(true, false) => Ordering::Less,
(false, true) => Ordering::Greater,
(false, false) => {
let a_key = &cat.unique_values[cat.data[i].to_usize()];
let b_key = &cat.unique_values[cat.data[j].to_usize()];
a_key.cmp(b_key)
}
}
});
let mut sorted_data = Vec64::<T>::with_capacity(len);
let mut sorted_mask = cat
.null_mask
.as_ref()
.map(|_| Bitmask::new_set_all(len, false));
for (out_i, &orig_i) in indices.iter().enumerate() {
sorted_data.push(cat.data[orig_i]);
if let Some(ref mask) = cat.null_mask {
if let Some(ref mut sm) = sorted_mask {
if unsafe { mask.get_unchecked(orig_i) } {
unsafe { sm.set_unchecked(out_i, true) };
}
}
}
}
(sorted_data, sorted_mask)
}
#[inline]
fn unpack_bitmask(mask: &Bitmask) -> Vec64<bool> {
let mut out = Vec64::with_capacity(mask.len);
for i in 0..mask.len {
out.push(unsafe { mask.get_unchecked(i) });
}
out
}
#[inline]
fn pack_bitmask(bits: &[bool]) -> Bitmask {
let mut mask = Bitmask::new_set_all(bits.len(), false);
for (i, &b) in bits.iter().enumerate() {
if b {
unsafe { mask.set_unchecked(i, true) };
}
}
mask
}
pub fn sort_boolean_array(arr: &mut BooleanArray<()>) {
let bits: Vec64<bool> = unpack_bitmask(&arr.data);
let nulls: Option<Vec64<bool>> = arr.null_mask.as_ref().map(unpack_bitmask);
let mut indices: Vec<usize> = (0..arr.len).collect();
indices.sort_unstable_by(|&i, &j| {
let a_null = !nulls.as_ref().map_or(true, |n| n[i]);
let b_null = !nulls.as_ref().map_or(true, |n| n[j]);
match (a_null, b_null) {
(true, true) => Ordering::Equal, (true, false) => Ordering::Less, (false, true) => Ordering::Greater, (false, false) => {
match (bits[i], bits[j]) {
(false, false) => Ordering::Equal,
(false, true) => Ordering::Less,
(true, false) => Ordering::Greater,
(true, true) => Ordering::Equal,
}
}
}
});
let sorted_bits: Vec<bool> = indices
.iter()
.map(|&idx| {
let is_null = !nulls.as_ref().map_or(true, |n| n[idx]);
if is_null { false } else { bits[idx] }
})
.collect();
arr.data = pack_bitmask(&sorted_bits);
if let Some(null_mask) = arr.null_mask.as_mut() {
let sorted_nulls: Vec<bool> = indices
.iter()
.map(|&idx| nulls.as_ref().unwrap()[idx])
.collect();
*null_mask = pack_bitmask(&sorted_nulls);
}
}
pub fn sort_slice_with_mask<T: Ord + Copy>(
data: &[T],
mask: Option<&Bitmask>,
) -> (Vec64<T>, Option<Bitmask>) {
let n = data.len();
let mut indices: Vec<usize> = (0..n).collect();
indices.sort_unstable_by(|&i, &j| data[i].cmp(&data[j]));
let mut sorted_data = Vec64::<T>::with_capacity(n);
unsafe {
sorted_data.set_len(n);
}
for (i, &idx) in indices.iter().enumerate() {
unsafe {
*sorted_data.get_unchecked_mut(i) = data[idx];
}
}
let sorted_mask = mask.map(|m| {
let mut out = Bitmask::new_set_all(n, false);
for (i, &idx) in indices.iter().enumerate() {
if unsafe { m.get_unchecked(idx) } {
unsafe { out.set_unchecked(i, true) };
}
}
out
});
(sorted_data, sorted_mask)
}
#[inline]
pub fn argsort<T: Ord>(data: &[T], descending: bool) -> Vec<usize> {
argsort_with_stability(data, descending, false)
}
#[inline]
pub fn argsort_with_stability<T: Ord>(data: &[T], descending: bool, stable: bool) -> Vec<usize> {
let n = data.len();
if n == 0 {
return vec![];
}
let mut indices: Vec<usize> = (0..n).collect();
if stable {
if descending {
indices.sort_by(|&i, &j| data[j].cmp(&data[i]));
} else {
indices.sort_by(|&i, &j| data[i].cmp(&data[j]));
}
} else {
if descending {
indices.sort_unstable_by(|&i, &j| data[j].cmp(&data[i]));
} else {
indices.sort_unstable_by(|&i, &j| data[i].cmp(&data[j]));
}
}
indices
}
#[inline]
pub fn argsort_float<T: Float>(data: &[T], descending: bool) -> Vec<usize> {
argsort_float_with_stability(data, descending, false)
}
#[inline]
pub fn argsort_float_with_stability<T: Float>(
data: &[T],
descending: bool,
stable: bool,
) -> Vec<usize> {
let n = data.len();
if n == 0 {
return vec![];
}
let mut indices: Vec<usize> = (0..n).collect();
if stable {
if descending {
indices.sort_by(|&i, &j| total_cmp_f(&data[j], &data[i]));
} else {
indices.sort_by(|&i, &j| total_cmp_f(&data[i], &data[j]));
}
} else {
if descending {
indices.sort_unstable_by(|&i, &j| total_cmp_f(&data[j], &data[i]));
} else {
indices.sort_unstable_by(|&i, &j| total_cmp_f(&data[i], &data[j]));
}
}
indices
}
#[inline]
pub fn argsort_str(data: &[&str], descending: bool) -> Vec<usize> {
argsort_str_with_stability(data, descending, false)
}
#[inline]
pub fn argsort_str_with_stability(data: &[&str], descending: bool, stable: bool) -> Vec<usize> {
let n = data.len();
if n == 0 {
return vec![];
}
let mut indices: Vec<usize> = (0..n).collect();
if stable {
if descending {
indices.sort_by(|&i, &j| data[j].cmp(data[i]));
} else {
indices.sort_by(|&i, &j| data[i].cmp(data[j]));
}
} else {
if descending {
indices.sort_unstable_by(|&i, &j| data[j].cmp(data[i]));
} else {
indices.sort_unstable_by(|&i, &j| data[i].cmp(data[j]));
}
}
indices
}
pub fn argsort_string_array(offsets: &[usize], values: &str, descending: bool) -> Vec<usize> {
argsort_string_array_with_stability(offsets, values, descending, false)
}
pub fn argsort_string_array_with_stability(
offsets: &[usize],
values: &str,
descending: bool,
stable: bool,
) -> Vec<usize> {
let n = if offsets.is_empty() {
0
} else {
offsets.len() - 1
};
if n == 0 {
return vec![];
}
let mut indices: Vec<usize> = (0..n).collect();
let cmp = |i: &usize, j: &usize| {
let a = &values[offsets[*i]..offsets[*i + 1]];
let b = &values[offsets[*j]..offsets[*j + 1]];
if descending { b.cmp(a) } else { a.cmp(b) }
};
if stable {
indices.sort_by(cmp);
} else {
indices.sort_unstable_by(cmp);
}
indices
}
pub fn argsort_categorical_lexical<T: Ord + Copy + Zero + NumCast + Integer>(
cat: &CategoricalArray<T>,
descending: bool,
) -> Vec<usize> {
let len = cat.data.len();
if len == 0 {
return vec![];
}
let mut indices: Vec<usize> = (0..len).collect();
indices.sort_by(|&i, &j| {
let a_is_null = cat.is_null(i);
let b_is_null = cat.is_null(j);
let ordering = match (a_is_null, b_is_null) {
(true, true) => Ordering::Equal,
(true, false) => Ordering::Less, (false, true) => Ordering::Greater,
(false, false) => {
let a_key = &cat.unique_values[cat.data[i].to_usize()];
let b_key = &cat.unique_values[cat.data[j].to_usize()];
a_key.cmp(b_key)
}
};
if descending {
ordering.reverse()
} else {
ordering
}
});
indices
}
pub fn argsort_categorical_custom<T: Ord + Copy + Zero + NumCast + Integer>(
cat: &CategoricalArray<T>,
category_order: &[&str],
descending: bool,
) -> Vec<usize> {
let len = cat.data.len();
if len == 0 {
return vec![];
}
use std::collections::HashMap;
let order_map: HashMap<&str, usize> = category_order
.iter()
.enumerate()
.map(|(i, &s)| (s, i))
.collect();
let mut indices: Vec<usize> = (0..len).collect();
indices.sort_by(|&i, &j| {
let a_is_null = cat.is_null(i);
let b_is_null = cat.is_null(j);
let ordering = match (a_is_null, b_is_null) {
(true, true) => Ordering::Equal,
(true, false) => Ordering::Less,
(false, true) => Ordering::Greater,
(false, false) => {
let a_key = &cat.unique_values[cat.data[i].to_usize()];
let b_key = &cat.unique_values[cat.data[j].to_usize()];
let a_pos = order_map.get(a_key.as_str());
let b_pos = order_map.get(b_key.as_str());
match (a_pos, b_pos) {
(Some(ap), Some(bp)) => ap.cmp(bp),
(Some(_), None) => Ordering::Less, (None, Some(_)) => Ordering::Greater,
(None, None) => a_key.cmp(b_key), }
}
};
if descending {
ordering.reverse()
} else {
ordering
}
});
indices
}
pub fn argsort_boolean(
data: &Bitmask,
null_mask: Option<&Bitmask>,
descending: bool,
) -> Vec<usize> {
let n = data.len;
if n == 0 {
return vec![];
}
let mut indices: Vec<usize> = (0..n).collect();
indices.sort_unstable_by(|&i, &j| {
let a_null = null_mask.map_or(false, |m| !m.get(i));
let b_null = null_mask.map_or(false, |m| !m.get(j));
let ordering = match (a_null, b_null) {
(true, true) => Ordering::Equal,
(true, false) => Ordering::Less,
(false, true) => Ordering::Greater,
(false, false) => {
let a_val = data.get(i);
let b_val = data.get(j);
a_val.cmp(&b_val)
}
};
if descending {
ordering.reverse()
} else {
ordering
}
});
indices
}
pub fn argsort_radix_u32(data: &[u32], descending: bool) -> Vec<usize> {
let n = data.len();
if n == 0 {
return vec![];
}
let mut indices: Vec<usize> = (0..n).collect();
let mut temp_indices = vec![0usize; n];
for shift in (0..32).step_by(8) {
let mut counts = [0usize; 256];
for &idx in &indices {
let byte = ((data[idx] >> shift) & 0xFF) as usize;
counts[byte] += 1;
}
let mut positions = [0usize; 256];
if descending {
let mut sum = 0;
for i in (0..256).rev() {
positions[i] = sum;
sum += counts[i];
}
} else {
let mut sum = 0;
for i in 0..256 {
positions[i] = sum;
sum += counts[i];
}
}
for &idx in &indices {
let byte = ((data[idx] >> shift) & 0xFF) as usize;
temp_indices[positions[byte]] = idx;
positions[byte] += 1;
}
std::mem::swap(&mut indices, &mut temp_indices);
}
indices
}
pub fn argsort_radix_u64(data: &[u64], descending: bool) -> Vec<usize> {
let n = data.len();
if n == 0 {
return vec![];
}
let mut indices: Vec<usize> = (0..n).collect();
let mut temp_indices = vec![0usize; n];
for shift in (0..64).step_by(8) {
let mut counts = [0usize; 256];
for &idx in &indices {
let byte = ((data[idx] >> shift) & 0xFF) as usize;
counts[byte] += 1;
}
let mut positions = [0usize; 256];
if descending {
let mut sum = 0;
for i in (0..256).rev() {
positions[i] = sum;
sum += counts[i];
}
} else {
let mut sum = 0;
for i in 0..256 {
positions[i] = sum;
sum += counts[i];
}
}
for &idx in &indices {
let byte = ((data[idx] >> shift) & 0xFF) as usize;
temp_indices[positions[byte]] = idx;
positions[byte] += 1;
}
std::mem::swap(&mut indices, &mut temp_indices);
}
indices
}
pub fn argsort_radix_i32(data: &[i32], descending: bool) -> Vec<usize> {
let unsigned: Vec<u32> = data.iter().map(|&x| (x as u32) ^ 0x8000_0000).collect();
argsort_radix_u32(&unsigned, descending)
}
pub fn argsort_radix_i64(data: &[i64], descending: bool) -> Vec<usize> {
let unsigned: Vec<u64> = data
.iter()
.map(|&x| (x as u64) ^ 0x8000_0000_0000_0000)
.collect();
argsort_radix_u64(&unsigned, descending)
}
#[cfg(feature = "simd_sort")]
pub mod simd_argsort {
use std::cmp::Ordering;
use voracious_radix_sort::{RadixSort, Radixable};
#[derive(Copy, Clone, Debug)]
struct IndexedU32 {
value: u32,
index: usize,
}
impl PartialOrd for IndexedU32 {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
self.value.partial_cmp(&other.value)
}
}
impl PartialEq for IndexedU32 {
fn eq(&self, other: &Self) -> bool {
self.value == other.value
}
}
impl Radixable<u32> for IndexedU32 {
type Key = u32;
#[inline]
fn key(&self) -> Self::Key {
self.value
}
}
#[derive(Copy, Clone, Debug)]
struct IndexedU64 {
value: u64,
index: usize,
}
impl PartialOrd for IndexedU64 {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
self.value.partial_cmp(&other.value)
}
}
impl PartialEq for IndexedU64 {
fn eq(&self, other: &Self) -> bool {
self.value == other.value
}
}
impl Radixable<u64> for IndexedU64 {
type Key = u64;
#[inline]
fn key(&self) -> Self::Key {
self.value
}
}
#[derive(Copy, Clone, Debug)]
struct IndexedI32 {
value: i32,
index: usize,
}
impl PartialOrd for IndexedI32 {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
self.value.partial_cmp(&other.value)
}
}
impl PartialEq for IndexedI32 {
fn eq(&self, other: &Self) -> bool {
self.value == other.value
}
}
impl Radixable<i32> for IndexedI32 {
type Key = i32;
#[inline]
fn key(&self) -> Self::Key {
self.value
}
}
#[derive(Copy, Clone, Debug)]
struct IndexedI64 {
value: i64,
index: usize,
}
impl PartialOrd for IndexedI64 {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
self.value.partial_cmp(&other.value)
}
}
impl PartialEq for IndexedI64 {
fn eq(&self, other: &Self) -> bool {
self.value == other.value
}
}
impl Radixable<i64> for IndexedI64 {
type Key = i64;
#[inline]
fn key(&self) -> Self::Key {
self.value
}
}
pub fn argsort_simd_u32(data: &[u32], descending: bool) -> Vec<usize> {
let n = data.len();
if n == 0 {
return vec![];
}
let mut indexed: Vec<IndexedU32> = data
.iter()
.enumerate()
.map(|(index, &value)| IndexedU32 { value, index })
.collect();
indexed.voracious_sort();
if descending {
indexed.iter().rev().map(|x| x.index).collect()
} else {
indexed.iter().map(|x| x.index).collect()
}
}
pub fn argsort_simd_u64(data: &[u64], descending: bool) -> Vec<usize> {
let n = data.len();
if n == 0 {
return vec![];
}
let mut indexed: Vec<IndexedU64> = data
.iter()
.enumerate()
.map(|(index, &value)| IndexedU64 { value, index })
.collect();
indexed.voracious_sort();
if descending {
indexed.iter().rev().map(|x| x.index).collect()
} else {
indexed.iter().map(|x| x.index).collect()
}
}
pub fn argsort_simd_i32(data: &[i32], descending: bool) -> Vec<usize> {
let n = data.len();
if n == 0 {
return vec![];
}
let mut indexed: Vec<IndexedI32> = data
.iter()
.enumerate()
.map(|(index, &value)| IndexedI32 { value, index })
.collect();
indexed.voracious_sort();
if descending {
indexed.iter().rev().map(|x| x.index).collect()
} else {
indexed.iter().map(|x| x.index).collect()
}
}
pub fn argsort_simd_i64(data: &[i64], descending: bool) -> Vec<usize> {
let n = data.len();
if n == 0 {
return vec![];
}
let mut indexed: Vec<IndexedI64> = data
.iter()
.enumerate()
.map(|(index, &value)| IndexedI64 { value, index })
.collect();
indexed.voracious_sort();
if descending {
indexed.iter().rev().map(|x| x.index).collect()
} else {
indexed.iter().map(|x| x.index).collect()
}
}
}
pub fn argsort_auto_i32(data: &[i32], config: &ArgsortConfig) -> Vec<usize> {
match config.algorithm {
SortAlgorithm::Comparison => argsort_with_stability(data, config.descending, config.stable),
SortAlgorithm::Radix => argsort_radix_i32(data, config.descending),
#[cfg(feature = "simd_sort")]
SortAlgorithm::Simd => simd_argsort::argsort_simd_i32(data, config.descending),
SortAlgorithm::Auto => {
if config.stable {
return argsort_with_stability(data, config.descending, true);
}
#[cfg(feature = "simd_sort")]
{
simd_argsort::argsort_simd_i32(data, config.descending)
}
#[cfg(not(feature = "simd_sort"))]
{
argsort_radix_i32(data, config.descending)
}
}
}
}
pub fn argsort_auto_i64(data: &[i64], config: &ArgsortConfig) -> Vec<usize> {
match config.algorithm {
SortAlgorithm::Comparison => argsort_with_stability(data, config.descending, config.stable),
SortAlgorithm::Radix => argsort_radix_i64(data, config.descending),
#[cfg(feature = "simd_sort")]
SortAlgorithm::Simd => simd_argsort::argsort_simd_i64(data, config.descending),
SortAlgorithm::Auto => {
if config.stable {
return argsort_with_stability(data, config.descending, true);
}
#[cfg(feature = "simd_sort")]
{
simd_argsort::argsort_simd_i64(data, config.descending)
}
#[cfg(not(feature = "simd_sort"))]
{
argsort_radix_i64(data, config.descending)
}
}
}
}
pub fn argsort_auto_u32(data: &[u32], config: &ArgsortConfig) -> Vec<usize> {
match config.algorithm {
SortAlgorithm::Comparison => argsort_with_stability(data, config.descending, config.stable),
SortAlgorithm::Radix => argsort_radix_u32(data, config.descending),
#[cfg(feature = "simd_sort")]
SortAlgorithm::Simd => simd_argsort::argsort_simd_u32(data, config.descending),
SortAlgorithm::Auto => {
if config.stable {
return argsort_with_stability(data, config.descending, true);
}
#[cfg(feature = "simd_sort")]
{
simd_argsort::argsort_simd_u32(data, config.descending)
}
#[cfg(not(feature = "simd_sort"))]
{
argsort_radix_u32(data, config.descending)
}
}
}
}
pub fn argsort_auto_u64(data: &[u64], config: &ArgsortConfig) -> Vec<usize> {
match config.algorithm {
SortAlgorithm::Comparison => argsort_with_stability(data, config.descending, config.stable),
SortAlgorithm::Radix => argsort_radix_u64(data, config.descending),
#[cfg(feature = "simd_sort")]
SortAlgorithm::Simd => simd_argsort::argsort_simd_u64(data, config.descending),
SortAlgorithm::Auto => {
if config.stable {
return argsort_with_stability(data, config.descending, true);
}
#[cfg(feature = "simd_sort")]
{
simd_argsort::argsort_simd_u64(data, config.descending)
}
#[cfg(not(feature = "simd_sort"))]
{
argsort_radix_u64(data, config.descending)
}
}
}
}
#[cfg(test)]
mod tests {
use minarrow::vec64;
use super::*;
#[test]
fn test_total_cmp_f32_ordering_basic() {
let a = 1.0f32;
let b = 2.0f32;
assert_eq!(total_cmp_f(&a, &b), std::cmp::Ordering::Less);
assert_eq!(total_cmp_f(&b, &a), std::cmp::Ordering::Greater);
assert_eq!(total_cmp_f(&a, &a), std::cmp::Ordering::Equal);
}
#[test]
fn test_total_cmp_f64_ordering_basic() {
let a = 1.0f64;
let b = 2.0f64;
assert_eq!(total_cmp_f(&a, &b), std::cmp::Ordering::Less);
assert_eq!(total_cmp_f(&b, &a), std::cmp::Ordering::Greater);
assert_eq!(total_cmp_f(&a, &a), std::cmp::Ordering::Equal);
}
#[test]
fn test_total_cmp_zero_and_negzero() {
let pz = 0.0f32;
let nz = -0.0f32;
assert_ne!(f32::to_bits(pz), f32::to_bits(nz));
assert_ne!(total_cmp_f(&pz, &nz), std::cmp::Ordering::Equal);
}
#[test]
fn test_total_cmp_nan_ordering_f32() {
let nan = f32::NAN;
let pos_inf = f32::INFINITY;
let neg_inf = f32::NEG_INFINITY;
let one = 1.0f32;
assert_eq!(total_cmp_f(&nan, &pos_inf), std::cmp::Ordering::Greater);
assert_eq!(total_cmp_f(&nan, &neg_inf), std::cmp::Ordering::Greater);
assert_eq!(total_cmp_f(&nan, &one), std::cmp::Ordering::Greater);
assert_eq!(total_cmp_f(&nan, &nan), std::cmp::Ordering::Equal);
}
#[test]
fn test_total_cmp_nan_ordering_f64() {
let nan = f64::NAN;
let pos_inf = f64::INFINITY;
let neg_inf = f64::NEG_INFINITY;
let one = 1.0f64;
assert_eq!(total_cmp_f(&nan, &pos_inf), std::cmp::Ordering::Greater);
assert_eq!(total_cmp_f(&nan, &neg_inf), std::cmp::Ordering::Greater);
assert_eq!(total_cmp_f(&nan, &one), std::cmp::Ordering::Greater);
assert_eq!(total_cmp_f(&nan, &nan), std::cmp::Ordering::Equal);
}
#[test]
fn test_sort_float_f32_all_edge_cases() {
let mut v = vec64![
3.0f32,
-0.0,
0.0,
f32::INFINITY,
f32::NEG_INFINITY,
1.0,
-1.0,
f32::NAN,
2.0,
-2.0,
];
sort_float(&mut v);
assert_eq!(v[0], f32::NEG_INFINITY);
assert_eq!(v[1], -2.0);
assert_eq!(v[2], -1.0);
assert_eq!(v[3], -0.0);
assert_eq!(v[4], 0.0);
assert_eq!(v[5], 1.0);
assert_eq!(v[6], 2.0);
assert_eq!(v[7], 3.0);
assert_eq!(v[8], f32::INFINITY);
assert!(v[9].is_nan());
}
#[test]
fn test_sort_float_f64_all_edge_cases() {
let mut v = vec64![
3.0f64,
-0.0,
0.0,
f64::INFINITY,
f64::NEG_INFINITY,
1.0,
-1.0,
f64::NAN,
2.0,
-2.0,
];
sort_float(&mut v);
assert_eq!(v[0], f64::NEG_INFINITY);
assert_eq!(v[1], -2.0);
assert_eq!(v[2], -1.0);
assert_eq!(v[3], -0.0);
assert_eq!(v[4], 0.0);
assert_eq!(v[5], 1.0);
assert_eq!(v[6], 2.0);
assert_eq!(v[7], 3.0);
assert_eq!(v[8], f64::INFINITY);
assert!(v[9].is_nan());
}
#[test]
fn test_sorted_float_immutability_and_return_type() {
let v = vec64![1.0f32, 2.0, 3.0];
let out = sorted_float(&v);
assert_eq!(out.as_slice(), &[1.0, 2.0, 3.0]);
assert_eq!(*v, [1.0, 2.0, 3.0]);
}
#[test]
fn test_sorted_float_correct_for_f64() {
let v = vec64![3.0f64, 2.0, 1.0];
let out = sorted_float(&v);
assert_eq!(out.as_slice(), &[1.0, 2.0, 3.0]);
}
#[test]
fn test_sort_float_empty_and_single() {
let mut v: [f32; 0] = [];
sort_float(&mut v);
let mut v2 = [42.0f32];
sort_float(&mut v2);
assert_eq!(v2, [42.0]);
}
#[cfg(test)]
mod tests {
use super::*;
use minarrow::{Vec64, vec64};
#[test]
fn test_sort_int_empty_and_single() {
let mut arr: [i32; 0] = [];
sort_int(&mut arr);
assert_eq!(arr, [] as [i32; 0]);
let mut arr2 = vec64![42];
sort_int(&mut arr2);
assert_eq!(*arr2, [42]);
}
#[test]
fn test_sort_int_order() {
let mut arr = vec64![4, 2, 7, 1, 1, 6, 0, -5];
sort_int(&mut arr);
assert_eq!(*arr, [-5, 0, 1, 1, 2, 4, 6, 7]);
}
#[test]
fn test_sorted_int_immutability_and_output() {
let arr = vec64![5, 3, 7, 1, 2];
let sorted = sorted_int(&arr);
assert_eq!(sorted.as_slice(), &[1, 2, 3, 5, 7]);
assert_eq!(*arr, [5, 3, 7, 1, 2]);
}
#[test]
fn test_sort_str_basic() {
let mut arr = vec64!["z", "b", "a", "d"];
sort_str(&mut arr);
assert_eq!(*arr, ["a", "b", "d", "z"]);
}
#[test]
fn test_sorted_str_and_non_ascii() {
let arr = vec64!["z", "ä", "b", "a", "d"];
let sorted = sorted_str(&arr);
assert_eq!(sorted.as_slice(), &["a", "b", "d", "z", "ä"]); assert_eq!(*arr, ["z", "ä", "b", "a", "d"]);
}
#[test]
fn test_sort_string_array_basic() {
let offsets = vec![0, 1, 3, 5, 5, 6]; let values = "abcde".to_string() + "f";
let (new_offsets, new_values) = sort_string_array(&offsets, &values);
let slices: Vec<_> = new_offsets
.windows(2)
.map(|w| &new_values[w[0]..w[1]])
.collect();
assert_eq!(slices, &["", "a", "bc", "de", "f"]);
}
#[test]
fn test_sort_categorical_lexical_basic_and_nulls() {
let unique = Vec64::from_slice_clone(&[
"apple".to_string(),
"banana".to_string(),
"pear".to_string(),
]);
let data = Vec64::from_slice(&[2u8, 0, 1, 1, 2, 0, 1]);
let mask = Bitmask::from_bools(&[true, false, true, true, true, false, true]);
let cat = CategoricalArray {
data: data.into(),
unique_values: unique,
null_mask: Some(mask.clone()),
};
let (sorted_data, sorted_mask) = sort_categorical_lexical(&cat);
let mask_out = sorted_mask.unwrap();
let null_pos: Vec<_> = (0..mask_out.len()).filter(|&i| !mask_out.get(i)).collect();
assert_eq!(null_pos, &[0, 1]);
let sorted_as_str: Vec<_> = sorted_data
.iter()
.map(|&k| &cat.unique_values[k.to_usize()][..])
.collect();
let vals = &sorted_as_str[null_pos.len()..];
assert_eq!(vals, &["banana", "banana", "banana", "pear", "pear"]);
}
#[test]
fn test_sort_categorical_all_nulls_and_no_nulls() {
let unique = Vec64::from_slice_clone(&["x".to_string()]);
let data = Vec64::from_slice(&[0u8, 0, 0]);
let mask = Bitmask::from_bools(&[false, false, false]);
let cat = CategoricalArray {
data: data.clone().into(),
unique_values: unique.clone(),
null_mask: Some(mask.clone()),
};
let (_, sorted_mask) = sort_categorical_lexical(&cat);
assert_eq!(
unpack_bitmask(&sorted_mask.unwrap()),
vec64![false, false, false]
);
let mask2 = Bitmask::from_bools(&[true, true, true]);
let cat2 = CategoricalArray {
data: data.clone().into(),
unique_values: unique,
null_mask: Some(mask2.clone()),
};
let (_, sorted_mask2) = sort_categorical_lexical(&cat2);
assert_eq!(
unpack_bitmask(&sorted_mask2.unwrap()),
vec64![true, true, true]
);
}
#[test]
fn test_sort_boolean_array_with_nulls() {
let mut arr = BooleanArray {
data: pack_bitmask(&[false, true, false, true, true, false]),
null_mask: Some(pack_bitmask(&[true, false, true, true, false, true])),
len: 6,
_phantom: std::marker::PhantomData,
};
sort_boolean_array(&mut arr);
let bits = unpack_bitmask(&arr.data);
let nulls = unpack_bitmask(arr.null_mask.as_ref().unwrap());
assert_eq!(nulls, vec64![false, false, true, true, true, true]);
assert_eq!(&bits[2..], [false, false, false, true]);
}
#[test]
fn test_sort_boolean_array_all_true_and_all_false() {
let mut arr = BooleanArray {
data: pack_bitmask(&[true, true, true]),
null_mask: None,
len: 3,
_phantom: std::marker::PhantomData,
};
sort_boolean_array(&mut arr);
assert_eq!(unpack_bitmask(&arr.data), vec64![true, true, true]);
let mut arr2 = BooleanArray {
data: pack_bitmask(&[false, false, false]),
null_mask: None,
len: 3,
_phantom: std::marker::PhantomData,
};
sort_boolean_array(&mut arr2);
assert_eq!(unpack_bitmask(&arr2.data), vec64![false, false, false]);
}
#[test]
fn test_sort_boolean_array_all_nulls_and_none() {
let mut arr = BooleanArray {
data: pack_bitmask(&[true, false, true]),
null_mask: Some(pack_bitmask(&[false, false, false])),
len: 3,
_phantom: std::marker::PhantomData,
};
sort_boolean_array(&mut arr);
assert_eq!(
unpack_bitmask(arr.null_mask.as_ref().unwrap()),
vec64![false, false, false]
);
}
#[test]
fn test_sort_slice_with_mask_basic() {
let data = vec64![3, 1, 2, 1];
let mask = pack_bitmask(&[true, false, true, true]);
let (sorted, mask_out) = sort_slice_with_mask(&data, Some(&mask));
assert_eq!(sorted.as_slice(), &[1, 1, 2, 3]);
assert_eq!(
unpack_bitmask(&mask_out.unwrap()),
vec64![false, true, true, true]
);
}
#[test]
fn test_sort_slice_with_mask_no_mask() {
let data = vec64![3, 2, 1, 1, 0];
let (sorted, mask_out) = sort_slice_with_mask(&data, None);
assert_eq!(sorted.as_slice(), &[0, 1, 1, 2, 3]);
assert!(mask_out.is_none());
}
#[test]
fn test_sort_slice_with_mask_all_true_and_all_false() {
let data = vec64![3, 2, 1, 0];
let mask_true = pack_bitmask(&[true; 4]);
let mask_false = pack_bitmask(&[false; 4]);
let (_, mask_true_out) = sort_slice_with_mask(&data, Some(&mask_true));
let (_, mask_false_out) = sort_slice_with_mask(&data, Some(&mask_false));
assert_eq!(
unpack_bitmask(&mask_true_out.unwrap()),
vec64![true, true, true, true]
);
assert_eq!(
unpack_bitmask(&mask_false_out.unwrap()),
vec64![false, false, false, false]
);
}
#[test]
fn test_sort_int_with_duplicates_and_negatives() {
let mut arr = vec64![-10, -1, 5, 0, 5, -10];
sort_int(&mut arr);
assert_eq!(*arr, [-10, -10, -1, 0, 5, 5]);
}
#[test]
fn test_sort_str_empty_and_duplicate() {
let mut arr = vec64!["", "a", "b", "a", ""];
sort_str(&mut arr);
assert_eq!(*arr, ["", "", "a", "a", "b"]);
}
#[test]
fn test_argsort_basic() {
let data = [5i32, 2, 8, 1, 9];
let indices = argsort(&data, false);
assert_eq!(indices, vec![3, 1, 0, 2, 4]);
let sorted: Vec<i32> = indices.iter().map(|&i| data[i]).collect();
assert_eq!(sorted, vec![1, 2, 5, 8, 9]);
}
#[test]
fn test_argsort_descending() {
let data = [5i32, 2, 8, 1, 9];
let indices = argsort(&data, true);
let sorted: Vec<i32> = indices.iter().map(|&i| data[i]).collect();
assert_eq!(sorted, vec![9, 8, 5, 2, 1]);
}
#[test]
fn test_argsort_empty() {
let data: [i32; 0] = [];
let indices = argsort(&data, false);
assert!(indices.is_empty());
}
#[test]
fn test_argsort_float_basic() {
let data = [3.0f64, 1.0, 4.0, 1.5, 2.0];
let indices = argsort_float(&data, false);
let sorted: Vec<f64> = indices.iter().map(|&i| data[i]).collect();
assert_eq!(sorted, vec![1.0, 1.5, 2.0, 3.0, 4.0]);
}
#[test]
fn test_argsort_float_with_nan() {
let data = [3.0f64, f64::NAN, 1.0, f64::NEG_INFINITY];
let indices = argsort_float(&data, false);
let sorted: Vec<f64> = indices.iter().map(|&i| data[i]).collect();
assert_eq!(sorted[0], f64::NEG_INFINITY);
assert_eq!(sorted[1], 1.0);
assert_eq!(sorted[2], 3.0);
assert!(sorted[3].is_nan());
}
#[test]
fn test_argsort_str_basic() {
let data = ["cherry", "apple", "banana"];
let indices = argsort_str(&data, false);
let sorted: Vec<&str> = indices.iter().map(|&i| data[i]).collect();
assert_eq!(sorted, vec!["apple", "banana", "cherry"]);
}
#[test]
fn test_argsort_str_descending() {
let data = ["cherry", "apple", "banana"];
let indices = argsort_str(&data, true);
let sorted: Vec<&str> = indices.iter().map(|&i| data[i]).collect();
assert_eq!(sorted, vec!["cherry", "banana", "apple"]);
}
#[test]
fn test_argsort_string_array_basic() {
let values = "applecherrybanana";
let offsets = [0usize, 5, 11, 17];
let indices = argsort_string_array(&offsets, values, false);
assert_eq!(indices, vec![0, 2, 1]);
}
#[test]
fn test_argsort_radix_u32_basic() {
let data = vec![5u32, 2, 8, 1, 9, 3, 7, 4, 6, 0];
let indices = argsort_radix_u32(&data, false);
let sorted: Vec<u32> = indices.iter().map(|&i| data[i]).collect();
assert_eq!(sorted, vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]);
}
#[test]
fn test_argsort_radix_u32_descending() {
let data = vec![5u32, 2, 8, 1, 9, 3, 7, 4, 6, 0];
let indices = argsort_radix_u32(&data, true);
let sorted: Vec<u32> = indices.iter().map(|&i| data[i]).collect();
assert_eq!(sorted, vec![9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
}
#[test]
fn test_argsort_radix_i32_with_negatives() {
let data = vec![5i32, -2, 8, -1, 9, 3, -7, 4, 6, 0];
let indices = argsort_radix_i32(&data, false);
let sorted: Vec<i32> = indices.iter().map(|&i| data[i]).collect();
assert_eq!(sorted, vec![-7, -2, -1, 0, 3, 4, 5, 6, 8, 9]);
}
#[test]
fn test_argsort_radix_u64_basic() {
let data = vec![100u64, 50, 200, 25];
let indices = argsort_radix_u64(&data, false);
let sorted: Vec<u64> = indices.iter().map(|&i| data[i]).collect();
assert_eq!(sorted, vec![25, 50, 100, 200]);
}
#[test]
fn test_argsort_radix_i64_with_negatives() {
let data = vec![5i64, -2, 8, -1, 0];
let indices = argsort_radix_i64(&data, false);
let sorted: Vec<i64> = indices.iter().map(|&i| data[i]).collect();
assert_eq!(sorted, vec![-2, -1, 0, 5, 8]);
}
#[test]
fn test_argsort_boolean_basic() {
use minarrow::Bitmask;
let mut data = Bitmask::new_set_all(4, false);
data.set(0, true);
data.set(2, true);
let indices = argsort_boolean(&data, None, false);
let sorted: Vec<bool> = indices.iter().map(|&i| data.get(i)).collect();
assert_eq!(sorted, vec![false, false, true, true]);
}
#[test]
fn test_argsort_boolean_descending() {
use minarrow::Bitmask;
let mut data = Bitmask::new_set_all(4, false);
data.set(0, true);
data.set(2, true);
let indices = argsort_boolean(&data, None, true);
let sorted: Vec<bool> = indices.iter().map(|&i| data.get(i)).collect();
assert_eq!(sorted, vec![true, true, false, false]);
}
#[test]
fn test_argsort_boolean_with_nulls() {
use minarrow::Bitmask;
let mut data = Bitmask::new_set_all(4, false);
data.set(0, true);
data.set(2, true);
let mut null_mask = Bitmask::new_set_all(4, true);
null_mask.set(1, false);
let indices = argsort_boolean(&data, Some(&null_mask), false);
assert_eq!(indices[0], 1); }
#[test]
fn test_argsort_auto_i32_basic() {
let data = [5i32, 2, 8, 1, 9];
let config = ArgsortConfig::default();
let indices = argsort_auto_i32(&data, &config);
let sorted: Vec<i32> = indices.iter().map(|&i| data[i]).collect();
assert_eq!(sorted, vec![1, 2, 5, 8, 9]);
}
#[test]
fn test_argsort_auto_with_comparison() {
let data = [5i32, 2, 8, 1, 9];
let config = ArgsortConfig::new().algorithm(SortAlgorithm::Comparison);
let indices = argsort_auto_i32(&data, &config);
let sorted: Vec<i32> = indices.iter().map(|&i| data[i]).collect();
assert_eq!(sorted, vec![1, 2, 5, 8, 9]);
}
#[test]
fn test_argsort_auto_descending() {
let data = [5i32, 2, 8, 1, 9];
let config = ArgsortConfig::new().descending(true);
let indices = argsort_auto_i32(&data, &config);
let sorted: Vec<i32> = indices.iter().map(|&i| data[i]).collect();
assert_eq!(sorted, vec![9, 8, 5, 2, 1]);
}
fn apply_indices<T: Copy>(data: &[T], indices: &[usize]) -> Vec<T> {
indices.iter().map(|&i| data[i]).collect()
}
#[test]
fn test_i32_sorts_produce_same_results() {
let data = vec![
5i32,
-2,
8,
-1,
9,
3,
-7,
4,
6,
0,
i32::MAX,
i32::MIN,
0,
-100,
100,
1,
1,
1,
-1,
-1, ];
let comparison_asc = argsort(&data, false);
let radix_asc = argsort_radix_i32(&data, false);
let auto_comparison = argsort_auto_i32(
&data,
&ArgsortConfig::new().algorithm(SortAlgorithm::Comparison),
);
let auto_radix =
argsort_auto_i32(&data, &ArgsortConfig::new().algorithm(SortAlgorithm::Radix));
let auto_default = argsort_auto_i32(&data, &ArgsortConfig::default());
let sorted_comparison: Vec<i32> = apply_indices(&data, &comparison_asc);
let sorted_radix: Vec<i32> = apply_indices(&data, &radix_asc);
let sorted_auto_comparison: Vec<i32> = apply_indices(&data, &auto_comparison);
let sorted_auto_radix: Vec<i32> = apply_indices(&data, &auto_radix);
let sorted_auto_default: Vec<i32> = apply_indices(&data, &auto_default);
assert_eq!(
sorted_comparison, sorted_radix,
"comparison vs radix mismatch"
);
assert_eq!(
sorted_comparison, sorted_auto_comparison,
"comparison vs auto_comparison mismatch"
);
assert_eq!(
sorted_comparison, sorted_auto_radix,
"comparison vs auto_radix mismatch"
);
assert_eq!(
sorted_comparison, sorted_auto_default,
"comparison vs auto_default mismatch"
);
for i in 1..sorted_comparison.len() {
assert!(
sorted_comparison[i - 1] <= sorted_comparison[i],
"not sorted at index {}",
i
);
}
let comparison_desc = argsort(&data, true);
let radix_desc = argsort_radix_i32(&data, true);
let sorted_comparison_desc: Vec<i32> = apply_indices(&data, &comparison_desc);
let sorted_radix_desc: Vec<i32> = apply_indices(&data, &radix_desc);
assert_eq!(
sorted_comparison_desc, sorted_radix_desc,
"descending comparison vs radix mismatch"
);
for i in 1..sorted_comparison_desc.len() {
assert!(
sorted_comparison_desc[i - 1] >= sorted_comparison_desc[i],
"not sorted descending at index {}",
i
);
}
}
#[test]
fn test_i64_sorts_produce_same_results() {
let data = vec![
5i64,
-2,
8,
-1,
9,
3,
-7,
4,
6,
0,
i64::MAX,
i64::MIN,
0,
-100,
100,
i64::MAX - 1,
i64::MIN + 1,
];
let comparison_asc = argsort(&data, false);
let radix_asc = argsort_radix_i64(&data, false);
let auto_comparison = argsort_auto_i64(
&data,
&ArgsortConfig::new().algorithm(SortAlgorithm::Comparison),
);
let auto_radix =
argsort_auto_i64(&data, &ArgsortConfig::new().algorithm(SortAlgorithm::Radix));
let sorted_comparison: Vec<i64> = apply_indices(&data, &comparison_asc);
let sorted_radix: Vec<i64> = apply_indices(&data, &radix_asc);
let sorted_auto_comparison: Vec<i64> = apply_indices(&data, &auto_comparison);
let sorted_auto_radix: Vec<i64> = apply_indices(&data, &auto_radix);
assert_eq!(
sorted_comparison, sorted_radix,
"i64: comparison vs radix mismatch"
);
assert_eq!(
sorted_comparison, sorted_auto_comparison,
"i64: comparison vs auto_comparison mismatch"
);
assert_eq!(
sorted_comparison, sorted_auto_radix,
"i64: comparison vs auto_radix mismatch"
);
for i in 1..sorted_comparison.len() {
assert!(
sorted_comparison[i - 1] <= sorted_comparison[i],
"i64: not sorted at index {}",
i
);
}
}
#[test]
fn test_u32_sorts_produce_same_results() {
let data = vec![
5u32,
2,
8,
1,
9,
3,
7,
4,
6,
0,
u32::MAX,
0,
100,
u32::MAX - 1,
1,
1,
1, ];
let comparison_asc = argsort(&data, false);
let radix_asc = argsort_radix_u32(&data, false);
let auto_comparison = argsort_auto_u32(
&data,
&ArgsortConfig::new().algorithm(SortAlgorithm::Comparison),
);
let auto_radix =
argsort_auto_u32(&data, &ArgsortConfig::new().algorithm(SortAlgorithm::Radix));
let sorted_comparison: Vec<u32> = apply_indices(&data, &comparison_asc);
let sorted_radix: Vec<u32> = apply_indices(&data, &radix_asc);
let sorted_auto_comparison: Vec<u32> = apply_indices(&data, &auto_comparison);
let sorted_auto_radix: Vec<u32> = apply_indices(&data, &auto_radix);
assert_eq!(
sorted_comparison, sorted_radix,
"u32: comparison vs radix mismatch"
);
assert_eq!(
sorted_comparison, sorted_auto_comparison,
"u32: comparison vs auto_comparison mismatch"
);
assert_eq!(
sorted_comparison, sorted_auto_radix,
"u32: comparison vs auto_radix mismatch"
);
for i in 1..sorted_comparison.len() {
assert!(
sorted_comparison[i - 1] <= sorted_comparison[i],
"u32: not sorted at index {}",
i
);
}
}
#[test]
fn test_u64_sorts_produce_same_results() {
let data = vec![
5u64,
2,
8,
1,
9,
3,
7,
4,
6,
0,
u64::MAX,
0,
100,
u64::MAX - 1,
];
let comparison_asc = argsort(&data, false);
let radix_asc = argsort_radix_u64(&data, false);
let auto_comparison = argsort_auto_u64(
&data,
&ArgsortConfig::new().algorithm(SortAlgorithm::Comparison),
);
let auto_radix =
argsort_auto_u64(&data, &ArgsortConfig::new().algorithm(SortAlgorithm::Radix));
let sorted_comparison: Vec<u64> = apply_indices(&data, &comparison_asc);
let sorted_radix: Vec<u64> = apply_indices(&data, &radix_asc);
let sorted_auto_comparison: Vec<u64> = apply_indices(&data, &auto_comparison);
let sorted_auto_radix: Vec<u64> = apply_indices(&data, &auto_radix);
assert_eq!(
sorted_comparison, sorted_radix,
"u64: comparison vs radix mismatch"
);
assert_eq!(
sorted_comparison, sorted_auto_comparison,
"u64: comparison vs auto_comparison mismatch"
);
assert_eq!(
sorted_comparison, sorted_auto_radix,
"u64: comparison vs auto_radix mismatch"
);
for i in 1..sorted_comparison.len() {
assert!(
sorted_comparison[i - 1] <= sorted_comparison[i],
"u64: not sorted at index {}",
i
);
}
}
#[cfg(feature = "simd_sort")]
#[test]
fn test_i32_sorts_produce_same_results_with_simd() {
use super::simd_argsort;
let data = vec![
5i32,
-2,
8,
-1,
9,
3,
-7,
4,
6,
0,
i32::MAX,
i32::MIN,
0,
-100,
100,
1,
1,
1,
-1,
-1,
];
let comparison_asc = argsort(&data, false);
let radix_asc = argsort_radix_i32(&data, false);
let simd_asc = simd_argsort::argsort_simd_i32(&data, false);
let auto_simd =
argsort_auto_i32(&data, &ArgsortConfig::new().algorithm(SortAlgorithm::Simd));
let sorted_comparison: Vec<i32> = apply_indices(&data, &comparison_asc);
let sorted_radix: Vec<i32> = apply_indices(&data, &radix_asc);
let sorted_simd: Vec<i32> = apply_indices(&data, &simd_asc);
let sorted_auto_simd: Vec<i32> = apply_indices(&data, &auto_simd);
assert_eq!(
sorted_comparison, sorted_radix,
"simd test: comparison vs radix mismatch"
);
assert_eq!(
sorted_comparison, sorted_simd,
"simd test: comparison vs simd mismatch"
);
assert_eq!(
sorted_comparison, sorted_auto_simd,
"simd test: comparison vs auto_simd mismatch"
);
}
#[cfg(feature = "simd_sort")]
#[test]
fn test_i64_sorts_produce_same_results_with_simd() {
use super::simd_argsort;
let data = vec![
5i64,
-2,
8,
-1,
9,
3,
-7,
4,
6,
0,
i64::MAX,
i64::MIN,
0,
-100,
100,
];
let comparison_asc = argsort(&data, false);
let radix_asc = argsort_radix_i64(&data, false);
let simd_asc = simd_argsort::argsort_simd_i64(&data, false);
let sorted_comparison: Vec<i64> = apply_indices(&data, &comparison_asc);
let sorted_radix: Vec<i64> = apply_indices(&data, &radix_asc);
let sorted_simd: Vec<i64> = apply_indices(&data, &simd_asc);
assert_eq!(
sorted_comparison, sorted_radix,
"simd i64: comparison vs radix mismatch"
);
assert_eq!(
sorted_comparison, sorted_simd,
"simd i64: comparison vs simd mismatch"
);
}
#[cfg(feature = "simd_sort")]
#[test]
fn test_u32_sorts_produce_same_results_with_simd() {
use super::simd_argsort;
let data = vec![
5u32,
2,
8,
1,
9,
3,
7,
4,
6,
0,
u32::MAX,
0,
100,
u32::MAX - 1,
];
let comparison_asc = argsort(&data, false);
let radix_asc = argsort_radix_u32(&data, false);
let simd_asc = simd_argsort::argsort_simd_u32(&data, false);
let sorted_comparison: Vec<u32> = apply_indices(&data, &comparison_asc);
let sorted_radix: Vec<u32> = apply_indices(&data, &radix_asc);
let sorted_simd: Vec<u32> = apply_indices(&data, &simd_asc);
assert_eq!(
sorted_comparison, sorted_radix,
"simd u32: comparison vs radix mismatch"
);
assert_eq!(
sorted_comparison, sorted_simd,
"simd u32: comparison vs simd mismatch"
);
}
#[cfg(feature = "simd_sort")]
#[test]
fn test_u64_sorts_produce_same_results_with_simd() {
use super::simd_argsort;
let data = vec![
5u64,
2,
8,
1,
9,
3,
7,
4,
6,
0,
u64::MAX,
0,
100,
u64::MAX - 1,
];
let comparison_asc = argsort(&data, false);
let radix_asc = argsort_radix_u64(&data, false);
let simd_asc = simd_argsort::argsort_simd_u64(&data, false);
let sorted_comparison: Vec<u64> = apply_indices(&data, &comparison_asc);
let sorted_radix: Vec<u64> = apply_indices(&data, &radix_asc);
let sorted_simd: Vec<u64> = apply_indices(&data, &simd_asc);
assert_eq!(
sorted_comparison, sorted_radix,
"simd u64: comparison vs radix mismatch"
);
assert_eq!(
sorted_comparison, sorted_simd,
"simd u64: comparison vs simd mismatch"
);
}
#[test]
fn test_numeric_sorts_produce_same_results_large_dataset() {
fn lcg_next(state: &mut u64) -> i32 {
*state = state.wrapping_mul(6364136223846793005).wrapping_add(1);
(*state >> 33) as i32
}
let mut state = 12345u64;
let data: Vec<i32> = (0..10_000).map(|_| lcg_next(&mut state)).collect();
let comparison_asc = argsort(&data, false);
let radix_asc = argsort_radix_i32(&data, false);
let sorted_comparison: Vec<i32> = apply_indices(&data, &comparison_asc);
let sorted_radix: Vec<i32> = apply_indices(&data, &radix_asc);
assert_eq!(
sorted_comparison, sorted_radix,
"large dataset: comparison vs radix mismatch"
);
for i in 1..sorted_comparison.len() {
assert!(
sorted_comparison[i - 1] <= sorted_comparison[i],
"large dataset: not sorted at index {}",
i
);
}
}
#[test]
fn test_stable_sort_preserves_relative_order() {
let data = [3i32, 1, 2, 1];
let indices = argsort_with_stability(&data, false, true);
assert_eq!(indices[0], 1); assert_eq!(indices[1], 3); assert_eq!(indices[2], 2); assert_eq!(indices[3], 0); }
#[test]
fn test_stable_float_preserves_relative_order() {
let data = [3.0f64, 1.0, 2.0, 1.0];
let indices = argsort_float_with_stability(&data, false, true);
assert_eq!(indices[0], 1); assert_eq!(indices[1], 3); assert_eq!(indices[2], 2); assert_eq!(indices[3], 0); }
#[test]
fn test_argsort_config_stable_default_true() {
let config = ArgsortConfig::default();
assert!(config.stable);
}
#[test]
fn test_argsort_auto_stable_preserves_order() {
let data = [3i32, 1, 2, 1];
let config = ArgsortConfig::new(); let indices = argsort_auto_i32(&data, &config);
assert_eq!(indices[0], 1); assert_eq!(indices[1], 3); }
#[test]
fn test_argsort_auto_unstable_still_sorts_correctly() {
let data = [5i32, 2, 8, 1, 9];
let config = ArgsortConfig::new().stable(false);
let indices = argsort_auto_i32(&data, &config);
let sorted: Vec<i32> = indices.iter().map(|&i| data[i]).collect();
assert_eq!(sorted, vec![1, 2, 5, 8, 9]);
}
}
}