simd_kernels/kernels/
sort.rs

1// Copyright Peter Bower 2025. All Rights Reserved.
2// Licensed under Mozilla Public License (MPL) 2.0.
3
4//! # **Sorting Algorithms Kernels Module** - *Array Sorting and Ordering Operations*
5//!
6//! Sorting kernels for ordering operations across Arrow-compatible
7//! data types with null-aware semantics and optimised comparison strategies. Essential foundation
8//! for analytical operations requiring ordered data access and ranking computations.
9//!
10use std::cmp::Ordering;
11
12use minarrow::{Bitmask, BooleanArray, CategoricalArray, Integer, MaskedArray, Vec64};
13use num_traits::{Float, NumCast, Zero};
14
15/// Total ordering for f32/f64 as per IEEE 754
16///
17/// - NaN sorts greater than all numbers, including +inf.
18/// - -0.0 and +0.0 are distinct (we can change in future if needed - to filter first).
19/// - Preserves all edge case ordering.
20
21#[inline]
22pub fn sort_float<T: Float>(slice: &mut [T]) {
23    slice.sort_unstable_by(total_cmp_f);
24}
25/// TLDR: Instead of comparing the raw bit patterns directly (which places all negative‐sign bit values after the positives),
26/// we transform each bit pattern into a “sort key”:
27///     => If the sign bit is 1 (negative or negative‐NaN), we invert all bits: !bits.
28///     => Otherwise (sign bit 0), we flip only the sign bit: bits ^ 0x80…0.
29#[inline(always)]
30pub fn total_cmp_f<T: Float>(a: &T, b: &T) -> Ordering {
31    // We reinterpret the bits of `T` as either u32 or u64:
32    if std::mem::size_of::<T>() == 4 {
33        // f32 path
34        let ua = unsafe { *(a as *const T as *const u32) };
35        let ub = unsafe { *(b as *const T as *const u32) };
36        // Negative values get inverted; non-negatives get their sign bit flipped.
37        let ka = if ua & 0x8000_0000 != 0 {
38            !ua
39        } else {
40            ua ^ 0x8000_0000
41        };
42        let kb = if ub & 0x8000_0000 != 0 {
43            !ub
44        } else {
45            ub ^ 0x8000_0000
46        };
47        ka.cmp(&kb)
48    } else if std::mem::size_of::<T>() == 8 {
49        // f64 path
50        let ua = unsafe { *(a as *const T as *const u64) };
51        let ub = unsafe { *(b as *const T as *const u64) };
52        let ka = if ua & 0x8000_0000_0000_0000 != 0 {
53            !ua
54        } else {
55            ua ^ 0x8000_0000_0000_0000
56        };
57        let kb = if ub & 0x8000_0000_0000_0000 != 0 {
58            !ub
59        } else {
60            ub ^ 0x8000_0000_0000_0000
61        };
62        ka.cmp(&kb)
63    } else {
64        unreachable!("Only f32 and f64 are valid Float types.")
65    }
66}
67
68/// Returns a newly sorted Vec64, leaving the original slice untouched.
69pub fn sorted_float<T: Float>(data: &[T]) -> Vec64<T> {
70    let mut v = Vec64::from_slice(data);
71    sort_float(&mut v);
72    v
73}
74
75/// Performs in-place unstable sorting of integer slices with optimal performance.
76///
77/// High-performance sorting function optimised for integer types using unstable sort algorithms
78/// that prioritise speed over preserving the relative order of equal elements.
79///
80/// # Type Parameters
81/// - `T`: Integer type implementing `Ord + Copy` (i8, i16, i32, i64, u8, u16, u32, u64, etc.)
82///
83/// # Parameters
84/// - `slice`: Mutable slice to be sorted in-place
85///
86/// # Usage Example
87/// ```rust,ignore
88/// use simd_kernels::kernels::sort::sort_int;
89///
90/// let mut data = [64i32, 34, 25, 12, 22, 11, 90];
91/// sort_int(&mut data);
92/// // data is now [11, 12, 22, 25, 34, 64, 90]
93/// ```
94#[inline]
95pub fn sort_int<T: Ord + Copy>(slice: &mut [T]) {
96    slice.sort_unstable();
97}
98
99/// Creates a new sorted copy of integer data in a Vec64 container.
100///
101/// Clones input data into a Vec64 and sorts it using the optimised
102/// integer sorting algorithm. Returns a new sorted container while leaving the original data
103/// unchanged.
104///
105/// # Type Parameters
106/// - `T`: Integer type implementing `Ord + Copy` (i8, i16, i32, i64, u8, u16, u32, u64, etc.)
107///
108/// # Parameters
109/// - `data`: Source slice to be copied and sorted
110///
111/// # Returns
112/// A new `Vec64<T>` containing the sorted elements from the input slice.
113///
114/// # Usage Example
115/// ```rust,ignore
116/// use simd_kernels::kernels::sort::sorted_int;
117///
118/// let data = [64i32, 34, 25, 12, 22, 11, 90];
119/// let sorted = sorted_int(&data);
120/// // sorted contains [11, 12, 22, 25, 34, 64, 90]
121/// // original data unchanged
122/// ```
123pub fn sorted_int<T: Ord + Copy>(data: &[T]) -> Vec64<T> {
124    let mut v = Vec64::from_slice(data);
125    sort_int(&mut v);
126    v
127}
128
129/// Performs in-place unstable sorting of string slice references with lexicographic ordering.
130///
131/// High-performance sorting function for string references using unstable sort algorithms.
132/// Efficient lexicographic ordering for string processing, text analysis, and data organisation tasks.
133///
134/// # Parameters
135/// - `slice`: Mutable slice of string references to be sorted in-place
136///
137/// # Usage Example
138/// ```rust,ignore
139/// use simd_kernels::kernels::sort::sort_str;
140///
141/// let mut words = ["zebra", "apple", "banana", "cherry"];
142/// sort_str(&mut words);
143/// // words is now ["apple", "banana", "cherry", "zebra"]
144/// ```
145#[inline]
146pub fn sort_str(slice: &mut [&str]) {
147    slice.sort_unstable();
148}
149
150/// Creates a new sorted collection of owned strings from string references.
151///
152/// Copies string references into owned String objects within a Vec64
153/// container and sorts them lexicographically. Returns a new sorted collection while preserving
154/// the original string references for scenarios requiring owned string data.
155///
156/// # Parameters
157/// - `data`: Source slice of string references to be copied and sorted
158///
159/// # Returns
160/// A new `Vec64<String>` containing owned, sorted string copies from the input references.
161///
162/// # Memory Allocation
163/// - String allocation: Each string reference copied into a new String object
164/// - Container allocation: Vec64 provides 64-byte aligned storage for optimal performance
165/// - Heap usage: Total memory proportional to sum of string lengths plus container overhead
166///
167/// # Performance Characteristics
168/// - O(n) string allocation and copying (proportional to total string length)
169/// - O(n log n) sorting time complexity with string comparison overhead
170/// - Memory overhead: Requires additional heap space for all string content
171///
172/// # String Ownership
173/// - Input references: Original string references remain unchanged
174/// - Output strings: New owned String objects independent of input lifetime
175/// - Memory management: Vec64<String> handles automatic cleanup of owned strings
176///
177/// # Usage Example
178/// ```rust,ignore
179/// use simd_kernels::kernels::sort::sorted_str;
180///
181/// let words = ["zebra", "apple", "banana", "cherry"];
182/// let sorted = sorted_str(&words);
183/// // sorted contains owned Strings: ["apple", "banana", "cherry", "zebra"]
184/// // original words slice unchanged
185/// ```
186pub fn sorted_str(data: &[&str]) -> Vec64<String> {
187    let mut v = Vec64::from_slice(data);
188    sort_str(&mut v);
189    v.iter().map(|s| s.to_string()).collect()
190}
191
192/// For StringArray as contiguous utf8 buffer plus offsets.
193/// Assumes offsets + values as in minarrow StringArray.
194pub fn sort_string_array(offsets: &[usize], values: &str) -> (Vec64<usize>, String) {
195    let n = offsets.len() - 1;
196    let mut indices: Vec<usize> = (0..n).collect();
197
198    indices.sort_unstable_by(|&i, &j| {
199        let a = &values[offsets[i]..offsets[i + 1]];
200        let b = &values[offsets[j]..offsets[j + 1]];
201        a.cmp(b)
202    });
203
204    // Precompute total length for the output string buffer
205    let total_bytes: usize = indices
206        .iter()
207        .map(|&idx| offsets[idx + 1] - offsets[idx])
208        .sum();
209
210    // Preallocate buffers
211    let mut new_values = String::with_capacity(total_bytes);
212    let mut new_offsets = Vec64::<usize>::with_capacity(n + 1);
213
214    // Pre-extend buffers for unchecked writes
215    unsafe {
216        new_offsets.set_len(n + 1);
217    }
218    unsafe {
219        new_values.as_mut_vec().set_len(total_bytes);
220    }
221
222    let values_bytes = values.as_bytes();
223    let out_bytes = unsafe { new_values.as_mut_vec() };
224
225    // First offset is always 0
226    unsafe {
227        *new_offsets.get_unchecked_mut(0) = 0;
228    }
229    let mut current_offset = 0;
230    let mut out_ptr = 0;
231    for (i, &idx) in indices.iter().enumerate() {
232        let start = offsets[idx];
233        let end = offsets[idx + 1];
234        let len = end - start;
235        // Copy string bytes
236        unsafe {
237            std::ptr::copy_nonoverlapping(
238                values_bytes.as_ptr().add(start),
239                out_bytes.as_mut_ptr().add(out_ptr),
240                len,
241            );
242            current_offset += len;
243            *new_offsets.get_unchecked_mut(i + 1) = current_offset;
244        }
245        out_ptr += len;
246    }
247    // SAFETY: We filled up to `total_bytes`
248    unsafe {
249        new_values.as_mut_vec().set_len(current_offset);
250    }
251
252    (new_offsets, new_values)
253}
254
255/// Sorts a CategoricalArray lexically by its unique values, returning new indices and mask.
256/// The category dictionary is preserved. Nulls sort first.
257pub fn sort_categorical_lexical<T: Ord + Copy + Zero + NumCast + Integer>(
258    cat: &CategoricalArray<T>,
259) -> (Vec64<T>, Option<Bitmask>) {
260    let len = cat.data.len();
261    let mut indices: Vec<usize> = (0..len).collect();
262
263    // Sort indices: nulls first, then by value, stable.
264    indices.sort_by(|&i, &j| {
265        let a_is_null = cat.is_null(i);
266        let b_is_null = cat.is_null(j);
267        match (a_is_null, b_is_null) {
268            (true, true) => Ordering::Equal,
269            (true, false) => Ordering::Less,
270            (false, true) => Ordering::Greater,
271            (false, false) => {
272                // compare by the string value of the keys
273                let a_key = &cat.unique_values[cat.data[i].to_usize()];
274                let b_key = &cat.unique_values[cat.data[j].to_usize()];
275                a_key.cmp(b_key)
276            }
277        }
278    });
279
280    // Build output data and mask
281    let mut sorted_data = Vec64::<T>::with_capacity(len);
282    let mut sorted_mask = cat
283        .null_mask
284        .as_ref()
285        .map(|_| Bitmask::new_set_all(len, false));
286
287    for (out_i, &orig_i) in indices.iter().enumerate() {
288        sorted_data.push(cat.data[orig_i]);
289        if let Some(ref mask) = cat.null_mask {
290            if let Some(ref mut sm) = sorted_mask {
291                if unsafe { mask.get_unchecked(orig_i) } {
292                    unsafe { sm.set_unchecked(out_i, true) };
293                }
294            }
295        }
296    }
297
298    (sorted_data, sorted_mask)
299}
300
301/// Unpacks a Bitmask into a Vec<bool>
302#[inline]
303fn unpack_bitmask(mask: &Bitmask) -> Vec64<bool> {
304    let mut out = Vec64::with_capacity(mask.len);
305    for i in 0..mask.len {
306        out.push(unsafe { mask.get_unchecked(i) });
307    }
308    out
309}
310
311/// Packs a Vec<bool> into a Bitmask
312#[inline]
313fn pack_bitmask(bits: &[bool]) -> Bitmask {
314    let mut mask = Bitmask::new_set_all(bits.len(), false);
315    for (i, &b) in bits.iter().enumerate() {
316        if b {
317            unsafe { mask.set_unchecked(i, true) };
318        }
319    }
320    mask
321}
322
323/// Sorts a BooleanArray in-place by value: all false first, then true.
324/// Nulls sort first if present.
325pub fn sort_boolean_array(arr: &mut BooleanArray<()>) {
326    let bits: Vec64<bool> = unpack_bitmask(&arr.data);
327    let nulls: Option<Vec64<bool>> = arr.null_mask.as_ref().map(unpack_bitmask);
328
329    let mut indices: Vec<usize> = (0..arr.len).collect();
330
331    // Nulls sort first (is_null = !is_valid)
332    indices.sort_unstable_by(|&i, &j| {
333        let a_null = !nulls.as_ref().map_or(true, |n| n[i]);
334        let b_null = !nulls.as_ref().map_or(true, |n| n[j]);
335
336        match (a_null, b_null) {
337            (true, true) => Ordering::Equal,    // both null
338            (true, false) => Ordering::Less,    // null < non-null
339            (false, true) => Ordering::Greater, // non-null > null
340            (false, false) => {
341                // Both non-null: false < true
342                match (bits[i], bits[j]) {
343                    (false, false) => Ordering::Equal,
344                    (false, true) => Ordering::Less,
345                    (true, false) => Ordering::Greater,
346                    (true, true) => Ordering::Equal,
347                }
348            }
349        }
350    });
351
352    // Re-pack sorted logical values, forcing value=false for null slots
353    let sorted_bits: Vec<bool> = indices
354        .iter()
355        .map(|&idx| {
356            let is_null = !nulls.as_ref().map_or(true, |n| n[idx]);
357            if is_null { false } else { bits[idx] }
358        })
359        .collect();
360    arr.data = pack_bitmask(&sorted_bits);
361
362    // Re-pack the (optional) null mask.
363    if let Some(null_mask) = arr.null_mask.as_mut() {
364        let sorted_nulls: Vec<bool> = indices
365            .iter()
366            .map(|&idx| nulls.as_ref().unwrap()[idx])
367            .collect();
368        *null_mask = pack_bitmask(&sorted_nulls);
369    }
370}
371
372/// Sorts array data and applies the same permutation to the null mask.
373pub fn sort_slice_with_mask<T: Ord + Copy>(
374    data: &[T],
375    mask: Option<&Bitmask>,
376) -> (Vec64<T>, Option<Bitmask>) {
377    let n = data.len();
378    let mut indices: Vec<usize> = (0..n).collect();
379    indices.sort_unstable_by(|&i, &j| data[i].cmp(&data[j]));
380
381    let mut sorted_data = Vec64::<T>::with_capacity(n);
382    unsafe {
383        sorted_data.set_len(n);
384    }
385    for (i, &idx) in indices.iter().enumerate() {
386        unsafe {
387            *sorted_data.get_unchecked_mut(i) = data[idx];
388        }
389    }
390
391    let sorted_mask = mask.map(|m| {
392        let mut out = Bitmask::new_set_all(n, false);
393        for (i, &idx) in indices.iter().enumerate() {
394            if unsafe { m.get_unchecked(idx) } {
395                unsafe { out.set_unchecked(i, true) };
396            }
397        }
398        out
399    });
400
401    (sorted_data, sorted_mask)
402}
403
404#[cfg(test)]
405mod tests {
406    use minarrow::vec64;
407
408    use super::*;
409
410    #[test]
411    fn test_total_cmp_f32_ordering_basic() {
412        let a = 1.0f32;
413        let b = 2.0f32;
414        assert_eq!(total_cmp_f(&a, &b), std::cmp::Ordering::Less);
415        assert_eq!(total_cmp_f(&b, &a), std::cmp::Ordering::Greater);
416        assert_eq!(total_cmp_f(&a, &a), std::cmp::Ordering::Equal);
417    }
418
419    #[test]
420    fn test_total_cmp_f64_ordering_basic() {
421        let a = 1.0f64;
422        let b = 2.0f64;
423        assert_eq!(total_cmp_f(&a, &b), std::cmp::Ordering::Less);
424        assert_eq!(total_cmp_f(&b, &a), std::cmp::Ordering::Greater);
425        assert_eq!(total_cmp_f(&a, &a), std::cmp::Ordering::Equal);
426    }
427
428    #[test]
429    fn test_total_cmp_zero_and_negzero() {
430        let pz = 0.0f32;
431        let nz = -0.0f32;
432        // -0.0 should not equal 0.0 in bitwise comparison
433        assert_ne!(f32::to_bits(pz), f32::to_bits(nz));
434        assert_ne!(total_cmp_f(&pz, &nz), std::cmp::Ordering::Equal);
435    }
436
437    #[test]
438    fn test_total_cmp_nan_ordering_f32() {
439        let nan = f32::NAN;
440        let pos_inf = f32::INFINITY;
441        let neg_inf = f32::NEG_INFINITY;
442        let one = 1.0f32;
443
444        // NaN is greater than everything in this ordering
445        assert_eq!(total_cmp_f(&nan, &pos_inf), std::cmp::Ordering::Greater);
446        assert_eq!(total_cmp_f(&nan, &neg_inf), std::cmp::Ordering::Greater);
447        assert_eq!(total_cmp_f(&nan, &one), std::cmp::Ordering::Greater);
448        // Self-equality
449        assert_eq!(total_cmp_f(&nan, &nan), std::cmp::Ordering::Equal);
450    }
451
452    #[test]
453    fn test_total_cmp_nan_ordering_f64() {
454        let nan = f64::NAN;
455        let pos_inf = f64::INFINITY;
456        let neg_inf = f64::NEG_INFINITY;
457        let one = 1.0f64;
458
459        assert_eq!(total_cmp_f(&nan, &pos_inf), std::cmp::Ordering::Greater);
460        assert_eq!(total_cmp_f(&nan, &neg_inf), std::cmp::Ordering::Greater);
461        assert_eq!(total_cmp_f(&nan, &one), std::cmp::Ordering::Greater);
462        assert_eq!(total_cmp_f(&nan, &nan), std::cmp::Ordering::Equal);
463    }
464
465    #[test]
466    fn test_sort_float_f32_all_edge_cases() {
467        let mut v = vec64![
468            3.0f32,
469            -0.0,
470            0.0,
471            f32::INFINITY,
472            f32::NEG_INFINITY,
473            1.0,
474            -1.0,
475            f32::NAN,
476            2.0,
477            -2.0,
478        ];
479        sort_float(&mut v);
480        // Sorted by bit-pattern, not by value
481        // -2.0 < -1.0 < -0.0 < 0.0 < 1.0 < 2.0 < 3.0 < INF < NAN
482        assert_eq!(v[0], f32::NEG_INFINITY);
483        assert_eq!(v[1], -2.0);
484        assert_eq!(v[2], -1.0);
485        assert_eq!(v[3], -0.0);
486        assert_eq!(v[4], 0.0);
487        assert_eq!(v[5], 1.0);
488        assert_eq!(v[6], 2.0);
489        assert_eq!(v[7], 3.0);
490        assert_eq!(v[8], f32::INFINITY);
491        assert!(v[9].is_nan());
492    }
493
494    #[test]
495    fn test_sort_float_f64_all_edge_cases() {
496        let mut v = vec64![
497            3.0f64,
498            -0.0,
499            0.0,
500            f64::INFINITY,
501            f64::NEG_INFINITY,
502            1.0,
503            -1.0,
504            f64::NAN,
505            2.0,
506            -2.0,
507        ];
508        sort_float(&mut v);
509        assert_eq!(v[0], f64::NEG_INFINITY);
510        assert_eq!(v[1], -2.0);
511        assert_eq!(v[2], -1.0);
512        assert_eq!(v[3], -0.0);
513        assert_eq!(v[4], 0.0);
514        assert_eq!(v[5], 1.0);
515        assert_eq!(v[6], 2.0);
516        assert_eq!(v[7], 3.0);
517        assert_eq!(v[8], f64::INFINITY);
518        assert!(v[9].is_nan());
519    }
520
521    #[test]
522    fn test_sorted_float_immutability_and_return_type() {
523        let v = vec64![1.0f32, 2.0, 3.0];
524        let out = sorted_float(&v);
525        assert_eq!(out.as_slice(), &[1.0, 2.0, 3.0]);
526        // Ensure original is unchanged
527        assert_eq!(*v, [1.0, 2.0, 3.0]);
528    }
529
530    #[test]
531    fn test_sorted_float_correct_for_f64() {
532        let v = vec64![3.0f64, 2.0, 1.0];
533        let out = sorted_float(&v);
534        assert_eq!(out.as_slice(), &[1.0, 2.0, 3.0]);
535    }
536
537    #[test]
538    fn test_sort_float_empty_and_single() {
539        let mut v: [f32; 0] = [];
540        sort_float(&mut v);
541        let mut v2 = [42.0f32];
542        sort_float(&mut v2);
543        assert_eq!(v2, [42.0]);
544    }
545
546    #[cfg(test)]
547    mod tests {
548        use super::*;
549        use minarrow::{Vec64, vec64};
550
551        #[test]
552        fn test_sort_int_empty_and_single() {
553            let mut arr: [i32; 0] = [];
554            sort_int(&mut arr);
555            assert_eq!(arr, [] as [i32; 0]);
556            let mut arr2 = vec64![42];
557            sort_int(&mut arr2);
558            assert_eq!(*arr2, [42]);
559        }
560
561        #[test]
562        fn test_sort_int_order() {
563            let mut arr = vec64![4, 2, 7, 1, 1, 6, 0, -5];
564            sort_int(&mut arr);
565            assert_eq!(*arr, [-5, 0, 1, 1, 2, 4, 6, 7]);
566        }
567
568        #[test]
569        fn test_sorted_int_immutability_and_output() {
570            let arr = vec64![5, 3, 7, 1, 2];
571            let sorted = sorted_int(&arr);
572            assert_eq!(sorted.as_slice(), &[1, 2, 3, 5, 7]);
573            // ensure input not changed
574            assert_eq!(*arr, [5, 3, 7, 1, 2]);
575        }
576
577        #[test]
578        fn test_sort_str_basic() {
579            let mut arr = vec64!["z", "b", "a", "d"];
580            sort_str(&mut arr);
581            assert_eq!(*arr, ["a", "b", "d", "z"]);
582        }
583
584        #[test]
585        fn test_sorted_str_and_non_ascii() {
586            let arr = vec64!["z", "ä", "b", "a", "d"];
587            let sorted = sorted_str(&arr);
588            assert_eq!(sorted.as_slice(), &["a", "b", "d", "z", "ä"]); // note: utf8, ä > z in Rust
589            // input is untouched
590            assert_eq!(*arr, ["z", "ä", "b", "a", "d"]);
591        }
592
593        #[test]
594        fn test_sort_string_array_basic() {
595            let offsets = vec![0, 1, 3, 5, 5, 6]; // ["a", "bc", "de", "", "f"]
596            let values = "abcde".to_string() + "f";
597            let (new_offsets, new_values) = sort_string_array(&offsets, &values);
598            // Sorted order: "", "a", "bc", "de", "f"
599            let slices: Vec<_> = new_offsets
600                .windows(2)
601                .map(|w| &new_values[w[0]..w[1]])
602                .collect();
603            assert_eq!(slices, &["", "a", "bc", "de", "f"]);
604        }
605
606        #[test]
607        fn test_sort_categorical_lexical_basic_and_nulls() {
608            // Categories: 0 = "apple", 1 = "banana", 2 = "pear"
609            let unique = Vec64::from_slice_clone(&[
610                "apple".to_string(),
611                "banana".to_string(),
612                "pear".to_string(),
613            ]);
614            let data = Vec64::from_slice(&[2u8, 0, 1, 1, 2, 0, 1]);
615            let mask = Bitmask::from_bools(&[true, false, true, true, true, false, true]);
616            let cat = CategoricalArray {
617                data: data.into(),
618                unique_values: unique,
619                null_mask: Some(mask.clone()),
620            };
621            let (sorted_data, sorted_mask) = sort_categorical_lexical(&cat);
622
623            // Nulls first
624            let mask_out = sorted_mask.unwrap();
625            let null_pos: Vec<_> = (0..mask_out.len()).filter(|&i| !mask_out.get(i)).collect();
626            assert_eq!(null_pos, &[0, 1]);
627
628            // Remaining valid values
629            let sorted_as_str: Vec<_> = sorted_data
630                .iter()
631                .map(|&k| &cat.unique_values[k.to_usize()][..])
632                .collect();
633            let vals = &sorted_as_str[null_pos.len()..];
634            assert_eq!(vals, &["banana", "banana", "banana", "pear", "pear"]);
635        }
636
637        #[test]
638        fn test_sort_categorical_all_nulls_and_no_nulls() {
639            // All null
640            let unique = Vec64::from_slice_clone(&["x".to_string()]);
641            let data = Vec64::from_slice(&[0u8, 0, 0]);
642            let mask = Bitmask::from_bools(&[false, false, false]);
643            let cat = CategoricalArray {
644                data: data.clone().into(),
645                unique_values: unique.clone(),
646                null_mask: Some(mask.clone()),
647            };
648            let (_, sorted_mask) = sort_categorical_lexical(&cat);
649            assert_eq!(
650                unpack_bitmask(&sorted_mask.unwrap()),
651                vec64![false, false, false]
652            );
653            // No nulls
654            let mask2 = Bitmask::from_bools(&[true, true, true]);
655            let cat2 = CategoricalArray {
656                data: data.clone().into(),
657                unique_values: unique,
658                null_mask: Some(mask2.clone()),
659            };
660            let (_, sorted_mask2) = sort_categorical_lexical(&cat2);
661            assert_eq!(
662                unpack_bitmask(&sorted_mask2.unwrap()),
663                vec64![true, true, true]
664            );
665        }
666        #[test]
667        fn test_sort_boolean_array_with_nulls() {
668            let mut arr = BooleanArray {
669                data: pack_bitmask(&[false, true, false, true, true, false]),
670                null_mask: Some(pack_bitmask(&[true, false, true, true, false, true])),
671                len: 6,
672                _phantom: std::marker::PhantomData,
673            };
674            sort_boolean_array(&mut arr);
675            // Nulls first (mask false)
676            let bits = unpack_bitmask(&arr.data);
677            let nulls = unpack_bitmask(arr.null_mask.as_ref().unwrap());
678            assert_eq!(nulls, vec64![false, false, true, true, true, true]);
679            // Sorted data for valid (true): all false first, then true
680            assert_eq!(&bits[2..], [false, false, false, true]);
681        }
682
683        #[test]
684        fn test_sort_boolean_array_all_true_and_all_false() {
685            let mut arr = BooleanArray {
686                data: pack_bitmask(&[true, true, true]),
687                null_mask: None,
688                len: 3,
689                _phantom: std::marker::PhantomData,
690            };
691            sort_boolean_array(&mut arr);
692            assert_eq!(unpack_bitmask(&arr.data), vec64![true, true, true]);
693
694            let mut arr2 = BooleanArray {
695                data: pack_bitmask(&[false, false, false]),
696                null_mask: None,
697                len: 3,
698                _phantom: std::marker::PhantomData,
699            };
700            sort_boolean_array(&mut arr2);
701            assert_eq!(unpack_bitmask(&arr2.data), vec64![false, false, false]);
702        }
703
704        #[test]
705        fn test_sort_boolean_array_all_nulls_and_none() {
706            let mut arr = BooleanArray {
707                data: pack_bitmask(&[true, false, true]),
708                null_mask: Some(pack_bitmask(&[false, false, false])),
709                len: 3,
710                _phantom: std::marker::PhantomData,
711            };
712            sort_boolean_array(&mut arr);
713            assert_eq!(
714                unpack_bitmask(arr.null_mask.as_ref().unwrap()),
715                vec64![false, false, false]
716            );
717        }
718
719        #[test]
720        fn test_sort_slice_with_mask_basic() {
721            let data = vec64![3, 1, 2, 1];
722            let mask = pack_bitmask(&[true, false, true, true]);
723            let (sorted, mask_out) = sort_slice_with_mask(&data, Some(&mask));
724            assert_eq!(sorted.as_slice(), &[1, 1, 2, 3]);
725            assert_eq!(
726                unpack_bitmask(&mask_out.unwrap()),
727                vec64![false, true, true, true]
728            );
729        }
730
731        #[test]
732        fn test_sort_slice_with_mask_no_mask() {
733            let data = vec64![3, 2, 1, 1, 0];
734            let (sorted, mask_out) = sort_slice_with_mask(&data, None);
735            assert_eq!(sorted.as_slice(), &[0, 1, 1, 2, 3]);
736            assert!(mask_out.is_none());
737        }
738
739        #[test]
740        fn test_sort_slice_with_mask_all_true_and_all_false() {
741            let data = vec64![3, 2, 1, 0];
742            let mask_true = pack_bitmask(&[true; 4]);
743            let mask_false = pack_bitmask(&[false; 4]);
744            let (_, mask_true_out) = sort_slice_with_mask(&data, Some(&mask_true));
745            let (_, mask_false_out) = sort_slice_with_mask(&data, Some(&mask_false));
746            assert_eq!(
747                unpack_bitmask(&mask_true_out.unwrap()),
748                vec64![true, true, true, true]
749            );
750            assert_eq!(
751                unpack_bitmask(&mask_false_out.unwrap()),
752                vec64![false, false, false, false]
753            );
754        }
755
756        #[test]
757        fn test_sort_int_with_duplicates_and_negatives() {
758            let mut arr = vec64![-10, -1, 5, 0, 5, -10];
759            sort_int(&mut arr);
760            assert_eq!(*arr, [-10, -10, -1, 0, 5, 5]);
761        }
762
763        #[test]
764        fn test_sort_str_empty_and_duplicate() {
765            let mut arr = vec64!["", "a", "b", "a", ""];
766            sort_str(&mut arr);
767            assert_eq!(*arr, ["", "", "a", "a", "b"]);
768        }
769    }
770}