simd_kernels/kernels/
sort.rs

1// Copyright Peter Bower 2025. All Rights Reserved.
2// Licensed under Mozilla Public License (MPL) 2.0.
3
4//! # **Sorting Algorithms Kernels Module** - *Array Sorting and Ordering Operations*
5//!
6//! Sorting kernels for ordering operations across Arrow-compatible 
7//! data types with null-aware semantics and optimised comparison strategies. Essential foundation
8//! for analytical operations requiring ordered data access and ranking computations.
9//!
10use std::cmp::Ordering;
11
12use minarrow::{
13    Bitmask, BooleanArray, CategoricalArray, Integer, MaskedArray, Vec64,
14    traits::type_unions::Float,
15};
16use num_traits::{NumCast, Zero};
17
18/// Total ordering for f32/f64 as per IEEE 754
19///
20/// - NaN sorts greater than all numbers, including +inf.
21/// - -0.0 and +0.0 are distinct (we can change in future if needed - to filter first).
22/// - Preserves all edge case ordering.
23
24#[inline]
25pub fn sort_float<T: Float>(slice: &mut [T]) {
26    slice.sort_unstable_by(total_cmp_f);
27}
28/// TLDR: Instead of comparing the raw bit patterns directly (which places all negative‐sign bit values after the positives),
29/// we transform each bit pattern into a “sort key”:
30///     => If the sign bit is 1 (negative or negative‐NaN), we invert all bits: !bits.
31///     => Otherwise (sign bit 0), we flip only the sign bit: bits ^ 0x80…0.
32#[inline(always)]
33pub fn total_cmp_f<T: Float>(a: &T, b: &T) -> Ordering {
34    // We reinterpret the bits of `T` as either u32 or u64:
35    if std::mem::size_of::<T>() == 4 {
36        // f32 path
37        let ua = unsafe { *(a as *const T as *const u32) };
38        let ub = unsafe { *(b as *const T as *const u32) };
39        // Negative values get inverted; non-negatives get their sign bit flipped.
40        let ka = if ua & 0x8000_0000 != 0 {
41            !ua
42        } else {
43            ua ^ 0x8000_0000
44        };
45        let kb = if ub & 0x8000_0000 != 0 {
46            !ub
47        } else {
48            ub ^ 0x8000_0000
49        };
50        ka.cmp(&kb)
51    } else if std::mem::size_of::<T>() == 8 {
52        // f64 path
53        let ua = unsafe { *(a as *const T as *const u64) };
54        let ub = unsafe { *(b as *const T as *const u64) };
55        let ka = if ua & 0x8000_0000_0000_0000 != 0 {
56            !ua
57        } else {
58            ua ^ 0x8000_0000_0000_0000
59        };
60        let kb = if ub & 0x8000_0000_0000_0000 != 0 {
61            !ub
62        } else {
63            ub ^ 0x8000_0000_0000_0000
64        };
65        ka.cmp(&kb)
66    } else {
67        unreachable!("Only f32 and f64 are valid Float types.")
68    }
69}
70
71/// Returns a newly sorted Vec64, leaving the original slice untouched.
72pub fn sorted_float<T: Float>(data: &[T]) -> Vec64<T> {
73    let mut v = Vec64::from_slice(data);
74    sort_float(&mut v);
75    v
76}
77
78/// Performs in-place unstable sorting of integer slices with optimal performance.
79/// 
80/// High-performance sorting function optimised for integer types using unstable sort algorithms
81/// that prioritise speed over preserving the relative order of equal elements. 
82/// 
83/// # Type Parameters
84/// - `T`: Integer type implementing `Ord + Copy` (i8, i16, i32, i64, u8, u16, u32, u64, etc.)
85/// 
86/// # Parameters
87/// - `slice`: Mutable slice to be sorted in-place
88///
89/// # Usage Example
90/// ```rust,ignore
91/// use simd_kernels::kernels::sort::sort_int;
92/// 
93/// let mut data = [64i32, 34, 25, 12, 22, 11, 90];
94/// sort_int(&mut data);
95/// // data is now [11, 12, 22, 25, 34, 64, 90]
96/// ```
97#[inline]
98pub fn sort_int<T: Ord + Copy>(slice: &mut [T]) {
99    slice.sort_unstable();
100}
101
102/// Creates a new sorted copy of integer data in a Vec64 container.
103/// 
104/// Clones input data into a Vec64 and sorts it using the optimised
105/// integer sorting algorithm. Returns a new sorted container while leaving the original data
106/// unchanged.
107/// 
108/// # Type Parameters
109/// - `T`: Integer type implementing `Ord + Copy` (i8, i16, i32, i64, u8, u16, u32, u64, etc.)
110/// 
111/// # Parameters
112/// - `data`: Source slice to be copied and sorted
113/// 
114/// # Returns
115/// A new `Vec64<T>` containing the sorted elements from the input slice.
116/// 
117/// # Usage Example
118/// ```rust,ignore
119/// use simd_kernels::kernels::sort::sorted_int;
120/// 
121/// let data = [64i32, 34, 25, 12, 22, 11, 90];
122/// let sorted = sorted_int(&data);
123/// // sorted contains [11, 12, 22, 25, 34, 64, 90]
124/// // original data unchanged
125/// ```
126pub fn sorted_int<T: Ord + Copy>(data: &[T]) -> Vec64<T> {
127    let mut v = Vec64::from_slice(data);
128    sort_int(&mut v);
129    v
130}
131
132/// Performs in-place unstable sorting of string slice references with lexicographic ordering.
133/// 
134/// High-performance sorting function for string references using unstable sort algorithms.
135/// Efficient lexicographic ordering for string processing, text analysis, and data organisation tasks.
136/// 
137/// # Parameters
138/// - `slice`: Mutable slice of string references to be sorted in-place
139/// 
140/// # Usage Example
141/// ```rust,ignore
142/// use simd_kernels::kernels::sort::sort_str;
143/// 
144/// let mut words = ["zebra", "apple", "banana", "cherry"];
145/// sort_str(&mut words);
146/// // words is now ["apple", "banana", "cherry", "zebra"]
147/// ```
148#[inline]
149pub fn sort_str(slice: &mut [&str]) {
150    slice.sort_unstable();
151}
152
153/// Creates a new sorted collection of owned strings from string references.
154/// 
155/// Copies string references into owned String objects within a Vec64
156/// container and sorts them lexicographically. Returns a new sorted collection while preserving
157/// the original string references for scenarios requiring owned string data.
158/// 
159/// # Parameters
160/// - `data`: Source slice of string references to be copied and sorted
161/// 
162/// # Returns
163/// A new `Vec64<String>` containing owned, sorted string copies from the input references.
164/// 
165/// # Memory Allocation
166/// - String allocation: Each string reference copied into a new String object
167/// - Container allocation: Vec64 provides 64-byte aligned storage for optimal performance
168/// - Heap usage: Total memory proportional to sum of string lengths plus container overhead
169/// 
170/// # Performance Characteristics
171/// - O(n) string allocation and copying (proportional to total string length)
172/// - O(n log n) sorting time complexity with string comparison overhead
173/// - Memory overhead: Requires additional heap space for all string content
174/// 
175/// # String Ownership
176/// - Input references: Original string references remain unchanged
177/// - Output strings: New owned String objects independent of input lifetime
178/// - Memory management: Vec64<String> handles automatic cleanup of owned strings
179/// 
180/// # Usage Example
181/// ```rust,ignore
182/// use simd_kernels::kernels::sort::sorted_str;
183/// 
184/// let words = ["zebra", "apple", "banana", "cherry"];
185/// let sorted = sorted_str(&words);
186/// // sorted contains owned Strings: ["apple", "banana", "cherry", "zebra"]
187/// // original words slice unchanged
188/// ```
189pub fn sorted_str(data: &[&str]) -> Vec64<String> {
190    let mut v = Vec64::from_slice(data);
191    sort_str(&mut v);
192    v.iter().map(|s| s.to_string()).collect()
193}
194
195/// For StringArray as contiguous utf8 buffer plus offsets.
196/// Assumes offsets + values as in minarrow StringArray.
197pub fn sort_string_array(offsets: &[usize], values: &str) -> (Vec64<usize>, String) {
198    let n = offsets.len() - 1;
199    let mut indices: Vec<usize> = (0..n).collect();
200
201    indices.sort_unstable_by(|&i, &j| {
202        let a = &values[offsets[i]..offsets[i + 1]];
203        let b = &values[offsets[j]..offsets[j + 1]];
204        a.cmp(b)
205    });
206
207    // Precompute total length for the output string buffer
208    let total_bytes: usize = indices
209        .iter()
210        .map(|&idx| offsets[idx + 1] - offsets[idx])
211        .sum();
212
213    // Preallocate buffers
214    let mut new_values = String::with_capacity(total_bytes);
215    let mut new_offsets = Vec64::<usize>::with_capacity(n + 1);
216
217    // Pre-extend buffers for unchecked writes
218    unsafe {
219        new_offsets.set_len(n + 1);
220    }
221    unsafe {
222        new_values.as_mut_vec().set_len(total_bytes);
223    }
224
225    let values_bytes = values.as_bytes();
226    let out_bytes = unsafe { new_values.as_mut_vec() };
227
228    // First offset is always 0
229    unsafe {
230        *new_offsets.get_unchecked_mut(0) = 0;
231    }
232    let mut current_offset = 0;
233    let mut out_ptr = 0;
234    for (i, &idx) in indices.iter().enumerate() {
235        let start = offsets[idx];
236        let end = offsets[idx + 1];
237        let len = end - start;
238        // Copy string bytes
239        unsafe {
240            std::ptr::copy_nonoverlapping(
241                values_bytes.as_ptr().add(start),
242                out_bytes.as_mut_ptr().add(out_ptr),
243                len,
244            );
245            current_offset += len;
246            *new_offsets.get_unchecked_mut(i + 1) = current_offset;
247        }
248        out_ptr += len;
249    }
250    // SAFETY: We filled up to `total_bytes`
251    unsafe {
252        new_values.as_mut_vec().set_len(current_offset);
253    }
254
255    (new_offsets, new_values)
256}
257
258/// Sorts a CategoricalArray lexically by its unique values, returning new indices and mask.
259/// The category dictionary is preserved. Nulls sort first.
260pub fn sort_categorical_lexical<T: Ord + Copy + Zero + NumCast + Integer>(
261    cat: &CategoricalArray<T>,
262) -> (Vec64<T>, Option<Bitmask>) {
263    let len = cat.data.len();
264    let mut indices: Vec<usize> = (0..len).collect();
265
266    // Sort indices: nulls first, then by value, stable.
267    indices.sort_by(|&i, &j| {
268        let a_is_null = cat.is_null(i);
269        let b_is_null = cat.is_null(j);
270        match (a_is_null, b_is_null) {
271            (true, true) => Ordering::Equal,
272            (true, false) => Ordering::Less,
273            (false, true) => Ordering::Greater,
274            (false, false) => {
275                // compare by the string value of the keys
276                let a_key = &cat.unique_values[cat.data[i].to_usize()];
277                let b_key = &cat.unique_values[cat.data[j].to_usize()];
278                a_key.cmp(b_key)
279            }
280        }
281    });
282
283    // Build output data and mask
284    let mut sorted_data = Vec64::<T>::with_capacity(len);
285    let mut sorted_mask = cat
286        .null_mask
287        .as_ref()
288        .map(|_| Bitmask::new_set_all(len, false));
289
290    for (out_i, &orig_i) in indices.iter().enumerate() {
291        sorted_data.push(cat.data[orig_i]);
292        if let Some(ref mask) = cat.null_mask {
293            if let Some(ref mut sm) = sorted_mask {
294                if unsafe { mask.get_unchecked(orig_i) } {
295                    unsafe { sm.set_unchecked(out_i, true) };
296                }
297            }
298        }
299    }
300
301    (sorted_data, sorted_mask)
302}
303
304/// Unpacks a Bitmask into a Vec<bool>
305#[inline]
306fn unpack_bitmask(mask: &Bitmask) -> Vec64<bool> {
307    let mut out = Vec64::with_capacity(mask.len);
308    for i in 0..mask.len {
309        out.push(unsafe { mask.get_unchecked(i) });
310    }
311    out
312}
313
314/// Packs a Vec<bool> into a Bitmask
315#[inline]
316fn pack_bitmask(bits: &[bool]) -> Bitmask {
317    let mut mask = Bitmask::new_set_all(bits.len(), false);
318    for (i, &b) in bits.iter().enumerate() {
319        if b {
320            unsafe { mask.set_unchecked(i, true) };
321        }
322    }
323    mask
324}
325
326/// Sorts a BooleanArray in-place by value: all false first, then true.
327/// Nulls sort first if present.
328pub fn sort_boolean_array(arr: &mut BooleanArray<()>) {
329    let bits: Vec64<bool> = unpack_bitmask(&arr.data);
330    let nulls: Option<Vec64<bool>> = arr.null_mask.as_ref().map(unpack_bitmask);
331
332    let mut indices: Vec<usize> = (0..arr.len).collect();
333
334    // Nulls sort first (is_null = !is_valid)
335    indices.sort_unstable_by(|&i, &j| {
336        let a_null = !nulls.as_ref().map_or(true, |n| n[i]);
337        let b_null = !nulls.as_ref().map_or(true, |n| n[j]);
338
339        match (a_null, b_null) {
340            (true, true) => Ordering::Equal,    // both null
341            (true, false) => Ordering::Less,    // null < non-null
342            (false, true) => Ordering::Greater, // non-null > null
343            (false, false) => {
344                // Both non-null: false < true
345                match (bits[i], bits[j]) {
346                    (false, false) => Ordering::Equal,
347                    (false, true) => Ordering::Less,
348                    (true, false) => Ordering::Greater,
349                    (true, true) => Ordering::Equal,
350                }
351            }
352        }
353    });
354
355    // Re-pack sorted logical values, forcing value=false for null slots
356    let sorted_bits: Vec<bool> = indices
357        .iter()
358        .map(|&idx| {
359            let is_null = !nulls.as_ref().map_or(true, |n| n[idx]);
360            if is_null { false } else { bits[idx] }
361        })
362        .collect();
363    arr.data = pack_bitmask(&sorted_bits);
364
365    // Re-pack the (optional) null mask.
366    if let Some(null_mask) = arr.null_mask.as_mut() {
367        let sorted_nulls: Vec<bool> = indices
368            .iter()
369            .map(|&idx| nulls.as_ref().unwrap()[idx])
370            .collect();
371        *null_mask = pack_bitmask(&sorted_nulls);
372    }
373}
374
375/// Sorts array data and applies the same permutation to the null mask.
376pub fn sort_slice_with_mask<T: Ord + Copy>(
377    data: &[T],
378    mask: Option<&Bitmask>,
379) -> (Vec64<T>, Option<Bitmask>) {
380    let n = data.len();
381    let mut indices: Vec<usize> = (0..n).collect();
382    indices.sort_unstable_by(|&i, &j| data[i].cmp(&data[j]));
383
384    let mut sorted_data = Vec64::<T>::with_capacity(n);
385    unsafe {
386        sorted_data.set_len(n);
387    }
388    for (i, &idx) in indices.iter().enumerate() {
389        unsafe {
390            *sorted_data.get_unchecked_mut(i) = data[idx];
391        }
392    }
393
394    let sorted_mask = mask.map(|m| {
395        let mut out = Bitmask::new_set_all(n, false);
396        for (i, &idx) in indices.iter().enumerate() {
397            if unsafe { m.get_unchecked(idx) } {
398                unsafe { out.set_unchecked(i, true) };
399            }
400        }
401        out
402    });
403
404    (sorted_data, sorted_mask)
405}
406
407#[cfg(test)]
408mod tests {
409    use minarrow::vec64;
410
411    use super::*;
412
413    #[test]
414    fn test_total_cmp_f32_ordering_basic() {
415        let a = 1.0f32;
416        let b = 2.0f32;
417        assert_eq!(total_cmp_f(&a, &b), std::cmp::Ordering::Less);
418        assert_eq!(total_cmp_f(&b, &a), std::cmp::Ordering::Greater);
419        assert_eq!(total_cmp_f(&a, &a), std::cmp::Ordering::Equal);
420    }
421
422    #[test]
423    fn test_total_cmp_f64_ordering_basic() {
424        let a = 1.0f64;
425        let b = 2.0f64;
426        assert_eq!(total_cmp_f(&a, &b), std::cmp::Ordering::Less);
427        assert_eq!(total_cmp_f(&b, &a), std::cmp::Ordering::Greater);
428        assert_eq!(total_cmp_f(&a, &a), std::cmp::Ordering::Equal);
429    }
430
431    #[test]
432    fn test_total_cmp_zero_and_negzero() {
433        let pz = 0.0f32;
434        let nz = -0.0f32;
435        // -0.0 should not equal 0.0 in bitwise comparison
436        assert_ne!(f32::to_bits(pz), f32::to_bits(nz));
437        assert_ne!(total_cmp_f(&pz, &nz), std::cmp::Ordering::Equal);
438    }
439
440    #[test]
441    fn test_total_cmp_nan_ordering_f32() {
442        let nan = f32::NAN;
443        let pos_inf = f32::INFINITY;
444        let neg_inf = f32::NEG_INFINITY;
445        let one = 1.0f32;
446
447        // NaN is greater than everything in this ordering
448        assert_eq!(total_cmp_f(&nan, &pos_inf), std::cmp::Ordering::Greater);
449        assert_eq!(total_cmp_f(&nan, &neg_inf), std::cmp::Ordering::Greater);
450        assert_eq!(total_cmp_f(&nan, &one), std::cmp::Ordering::Greater);
451        // Self-equality
452        assert_eq!(total_cmp_f(&nan, &nan), std::cmp::Ordering::Equal);
453    }
454
455    #[test]
456    fn test_total_cmp_nan_ordering_f64() {
457        let nan = f64::NAN;
458        let pos_inf = f64::INFINITY;
459        let neg_inf = f64::NEG_INFINITY;
460        let one = 1.0f64;
461
462        assert_eq!(total_cmp_f(&nan, &pos_inf), std::cmp::Ordering::Greater);
463        assert_eq!(total_cmp_f(&nan, &neg_inf), std::cmp::Ordering::Greater);
464        assert_eq!(total_cmp_f(&nan, &one), std::cmp::Ordering::Greater);
465        assert_eq!(total_cmp_f(&nan, &nan), std::cmp::Ordering::Equal);
466    }
467
468    #[test]
469    fn test_sort_float_f32_all_edge_cases() {
470        let mut v = vec64![
471            3.0f32,
472            -0.0,
473            0.0,
474            f32::INFINITY,
475            f32::NEG_INFINITY,
476            1.0,
477            -1.0,
478            f32::NAN,
479            2.0,
480            -2.0,
481        ];
482        sort_float(&mut v);
483        // Sorted by bit-pattern, not by value
484        // -2.0 < -1.0 < -0.0 < 0.0 < 1.0 < 2.0 < 3.0 < INF < NAN
485        assert_eq!(v[0], f32::NEG_INFINITY);
486        assert_eq!(v[1], -2.0);
487        assert_eq!(v[2], -1.0);
488        assert_eq!(v[3], -0.0);
489        assert_eq!(v[4], 0.0);
490        assert_eq!(v[5], 1.0);
491        assert_eq!(v[6], 2.0);
492        assert_eq!(v[7], 3.0);
493        assert_eq!(v[8], f32::INFINITY);
494        assert!(v[9].is_nan());
495    }
496
497    #[test]
498    fn test_sort_float_f64_all_edge_cases() {
499        let mut v = vec64![
500            3.0f64,
501            -0.0,
502            0.0,
503            f64::INFINITY,
504            f64::NEG_INFINITY,
505            1.0,
506            -1.0,
507            f64::NAN,
508            2.0,
509            -2.0,
510        ];
511        sort_float(&mut v);
512        assert_eq!(v[0], f64::NEG_INFINITY);
513        assert_eq!(v[1], -2.0);
514        assert_eq!(v[2], -1.0);
515        assert_eq!(v[3], -0.0);
516        assert_eq!(v[4], 0.0);
517        assert_eq!(v[5], 1.0);
518        assert_eq!(v[6], 2.0);
519        assert_eq!(v[7], 3.0);
520        assert_eq!(v[8], f64::INFINITY);
521        assert!(v[9].is_nan());
522    }
523
524    #[test]
525    fn test_sorted_float_immutability_and_return_type() {
526        let v = vec64![1.0f32, 2.0, 3.0];
527        let out = sorted_float(&v);
528        assert_eq!(out.as_slice(), &[1.0, 2.0, 3.0]);
529        // Ensure original is unchanged
530        assert_eq!(*v, [1.0, 2.0, 3.0]);
531    }
532
533    #[test]
534    fn test_sorted_float_correct_for_f64() {
535        let v = vec64![3.0f64, 2.0, 1.0];
536        let out = sorted_float(&v);
537        assert_eq!(out.as_slice(), &[1.0, 2.0, 3.0]);
538    }
539
540    #[test]
541    fn test_sort_float_empty_and_single() {
542        let mut v: [f32; 0] = [];
543        sort_float(&mut v);
544        let mut v2 = [42.0f32];
545        sort_float(&mut v2);
546        assert_eq!(v2, [42.0]);
547    }
548
549    #[cfg(test)]
550    mod tests {
551        use super::*;
552        use minarrow::{Vec64, vec64};
553
554        #[test]
555        fn test_sort_int_empty_and_single() {
556            let mut arr: [i32; 0] = [];
557            sort_int(&mut arr);
558            assert_eq!(arr, [] as [i32; 0]);
559            let mut arr2 = vec64![42];
560            sort_int(&mut arr2);
561            assert_eq!(*arr2, [42]);
562        }
563
564        #[test]
565        fn test_sort_int_order() {
566            let mut arr = vec64![4, 2, 7, 1, 1, 6, 0, -5];
567            sort_int(&mut arr);
568            assert_eq!(*arr, [-5, 0, 1, 1, 2, 4, 6, 7]);
569        }
570
571        #[test]
572        fn test_sorted_int_immutability_and_output() {
573            let arr = vec64![5, 3, 7, 1, 2];
574            let sorted = sorted_int(&arr);
575            assert_eq!(sorted.as_slice(), &[1, 2, 3, 5, 7]);
576            // ensure input not changed
577            assert_eq!(*arr, [5, 3, 7, 1, 2]);
578        }
579
580        #[test]
581        fn test_sort_str_basic() {
582            let mut arr = vec64!["z", "b", "a", "d"];
583            sort_str(&mut arr);
584            assert_eq!(*arr, ["a", "b", "d", "z"]);
585        }
586
587        #[test]
588        fn test_sorted_str_and_non_ascii() {
589            let arr = vec64!["z", "ä", "b", "a", "d"];
590            let sorted = sorted_str(&arr);
591            assert_eq!(sorted.as_slice(), &["a", "b", "d", "z", "ä"]); // note: utf8, ä > z in Rust
592            // input is untouched
593            assert_eq!(*arr, ["z", "ä", "b", "a", "d"]);
594        }
595
596        #[test]
597        fn test_sort_string_array_basic() {
598            let offsets = vec![0, 1, 3, 5, 5, 6]; // ["a", "bc", "de", "", "f"]
599            let values = "abcde".to_string() + "f";
600            let (new_offsets, new_values) = sort_string_array(&offsets, &values);
601            // Sorted order: "", "a", "bc", "de", "f"
602            let slices: Vec<_> = new_offsets
603                .windows(2)
604                .map(|w| &new_values[w[0]..w[1]])
605                .collect();
606            assert_eq!(slices, &["", "a", "bc", "de", "f"]);
607        }
608
609        #[test]
610        fn test_sort_categorical_lexical_basic_and_nulls() {
611            // Categories: 0 = "apple", 1 = "banana", 2 = "pear"
612            let unique = Vec64::from_slice_clone(&[
613                "apple".to_string(),
614                "banana".to_string(),
615                "pear".to_string(),
616            ]);
617            let data = Vec64::from_slice(&[2u8, 0, 1, 1, 2, 0, 1]);
618            let mask = Bitmask::from_bools(&[true, false, true, true, true, false, true]);
619            let cat = CategoricalArray {
620                data: data.into(),
621                unique_values: unique,
622                null_mask: Some(mask.clone()),
623            };
624            let (sorted_data, sorted_mask) = sort_categorical_lexical(&cat);
625
626            // Nulls first
627            let mask_out = sorted_mask.unwrap();
628            let null_pos: Vec<_> = (0..mask_out.len()).filter(|&i| !mask_out.get(i)).collect();
629            assert_eq!(null_pos, &[0, 1]);
630
631            // Remaining valid values
632            let sorted_as_str: Vec<_> = sorted_data
633                .iter()
634                .map(|&k| &cat.unique_values[k.to_usize()][..])
635                .collect();
636            let vals = &sorted_as_str[null_pos.len()..];
637            assert_eq!(vals, &["banana", "banana", "banana", "pear", "pear"]);
638        }
639
640        #[test]
641        fn test_sort_categorical_all_nulls_and_no_nulls() {
642            // All null
643            let unique = Vec64::from_slice_clone(&["x".to_string()]);
644            let data = Vec64::from_slice(&[0u8, 0, 0]);
645            let mask = Bitmask::from_bools(&[false, false, false]);
646            let cat = CategoricalArray {
647                data: data.clone().into(),
648                unique_values: unique.clone(),
649                null_mask: Some(mask.clone()),
650            };
651            let (_, sorted_mask) = sort_categorical_lexical(&cat);
652            assert_eq!(
653                unpack_bitmask(&sorted_mask.unwrap()),
654                vec64![false, false, false]
655            );
656            // No nulls
657            let mask2 = Bitmask::from_bools(&[true, true, true]);
658            let cat2 = CategoricalArray {
659                data: data.clone().into(),
660                unique_values: unique,
661                null_mask: Some(mask2.clone()),
662            };
663            let (_, sorted_mask2) = sort_categorical_lexical(&cat2);
664            assert_eq!(
665                unpack_bitmask(&sorted_mask2.unwrap()),
666                vec64![true, true, true]
667            );
668        }
669        #[test]
670        fn test_sort_boolean_array_with_nulls() {
671            let mut arr = BooleanArray {
672                data: pack_bitmask(&[false, true, false, true, true, false]),
673                null_mask: Some(pack_bitmask(&[true, false, true, true, false, true])),
674                len: 6,
675                _phantom: std::marker::PhantomData,
676            };
677            sort_boolean_array(&mut arr);
678            // Nulls first (mask false)
679            let bits = unpack_bitmask(&arr.data);
680            let nulls = unpack_bitmask(arr.null_mask.as_ref().unwrap());
681            assert_eq!(nulls, vec64![false, false, true, true, true, true]);
682            // Sorted data for valid (true): all false first, then true
683            assert_eq!(&bits[2..], [false, false, false, true]);
684        }
685
686        #[test]
687        fn test_sort_boolean_array_all_true_and_all_false() {
688            let mut arr = BooleanArray {
689                data: pack_bitmask(&[true, true, true]),
690                null_mask: None,
691                len: 3,
692                _phantom: std::marker::PhantomData,
693            };
694            sort_boolean_array(&mut arr);
695            assert_eq!(unpack_bitmask(&arr.data), vec64![true, true, true]);
696
697            let mut arr2 = BooleanArray {
698                data: pack_bitmask(&[false, false, false]),
699                null_mask: None,
700                len: 3,
701                _phantom: std::marker::PhantomData,
702            };
703            sort_boolean_array(&mut arr2);
704            assert_eq!(unpack_bitmask(&arr2.data), vec64![false, false, false]);
705        }
706
707        #[test]
708        fn test_sort_boolean_array_all_nulls_and_none() {
709            let mut arr = BooleanArray {
710                data: pack_bitmask(&[true, false, true]),
711                null_mask: Some(pack_bitmask(&[false, false, false])),
712                len: 3,
713                _phantom: std::marker::PhantomData,
714            };
715            sort_boolean_array(&mut arr);
716            assert_eq!(
717                unpack_bitmask(arr.null_mask.as_ref().unwrap()),
718                vec64![false, false, false]
719            );
720        }
721
722        #[test]
723        fn test_sort_slice_with_mask_basic() {
724            let data = vec64![3, 1, 2, 1];
725            let mask = pack_bitmask(&[true, false, true, true]);
726            let (sorted, mask_out) = sort_slice_with_mask(&data, Some(&mask));
727            assert_eq!(sorted.as_slice(), &[1, 1, 2, 3]);
728            assert_eq!(
729                unpack_bitmask(&mask_out.unwrap()),
730                vec64![false, true, true, true]
731            );
732        }
733
734        #[test]
735        fn test_sort_slice_with_mask_no_mask() {
736            let data = vec64![3, 2, 1, 1, 0];
737            let (sorted, mask_out) = sort_slice_with_mask(&data, None);
738            assert_eq!(sorted.as_slice(), &[0, 1, 1, 2, 3]);
739            assert!(mask_out.is_none());
740        }
741
742        #[test]
743        fn test_sort_slice_with_mask_all_true_and_all_false() {
744            let data = vec64![3, 2, 1, 0];
745            let mask_true = pack_bitmask(&[true; 4]);
746            let mask_false = pack_bitmask(&[false; 4]);
747            let (_, mask_true_out) = sort_slice_with_mask(&data, Some(&mask_true));
748            let (_, mask_false_out) = sort_slice_with_mask(&data, Some(&mask_false));
749            assert_eq!(
750                unpack_bitmask(&mask_true_out.unwrap()),
751                vec64![true, true, true, true]
752            );
753            assert_eq!(
754                unpack_bitmask(&mask_false_out.unwrap()),
755                vec64![false, false, false, false]
756            );
757        }
758
759        #[test]
760        fn test_sort_int_with_duplicates_and_negatives() {
761            let mut arr = vec64![-10, -1, 5, 0, 5, -10];
762            sort_int(&mut arr);
763            assert_eq!(*arr, [-10, -10, -1, 0, 5, 5]);
764        }
765
766        #[test]
767        fn test_sort_str_empty_and_duplicate() {
768            let mut arr = vec64!["", "a", "b", "a", ""];
769            sort_str(&mut arr);
770            assert_eq!(*arr, ["", "", "a", "a", "b"]);
771        }
772    }
773}