1use std::cmp::Ordering;
11
12use minarrow::{
13 Bitmask, BooleanArray, CategoricalArray, Integer, MaskedArray, Vec64,
14 traits::type_unions::Float,
15};
16use num_traits::{NumCast, Zero};
17
18#[inline]
25pub fn sort_float<T: Float>(slice: &mut [T]) {
26 slice.sort_unstable_by(total_cmp_f);
27}
28#[inline(always)]
33pub fn total_cmp_f<T: Float>(a: &T, b: &T) -> Ordering {
34 if std::mem::size_of::<T>() == 4 {
36 let ua = unsafe { *(a as *const T as *const u32) };
38 let ub = unsafe { *(b as *const T as *const u32) };
39 let ka = if ua & 0x8000_0000 != 0 {
41 !ua
42 } else {
43 ua ^ 0x8000_0000
44 };
45 let kb = if ub & 0x8000_0000 != 0 {
46 !ub
47 } else {
48 ub ^ 0x8000_0000
49 };
50 ka.cmp(&kb)
51 } else if std::mem::size_of::<T>() == 8 {
52 let ua = unsafe { *(a as *const T as *const u64) };
54 let ub = unsafe { *(b as *const T as *const u64) };
55 let ka = if ua & 0x8000_0000_0000_0000 != 0 {
56 !ua
57 } else {
58 ua ^ 0x8000_0000_0000_0000
59 };
60 let kb = if ub & 0x8000_0000_0000_0000 != 0 {
61 !ub
62 } else {
63 ub ^ 0x8000_0000_0000_0000
64 };
65 ka.cmp(&kb)
66 } else {
67 unreachable!("Only f32 and f64 are valid Float types.")
68 }
69}
70
71pub fn sorted_float<T: Float>(data: &[T]) -> Vec64<T> {
73 let mut v = Vec64::from_slice(data);
74 sort_float(&mut v);
75 v
76}
77
78#[inline]
98pub fn sort_int<T: Ord + Copy>(slice: &mut [T]) {
99 slice.sort_unstable();
100}
101
102pub fn sorted_int<T: Ord + Copy>(data: &[T]) -> Vec64<T> {
127 let mut v = Vec64::from_slice(data);
128 sort_int(&mut v);
129 v
130}
131
132#[inline]
149pub fn sort_str(slice: &mut [&str]) {
150 slice.sort_unstable();
151}
152
153pub fn sorted_str(data: &[&str]) -> Vec64<String> {
190 let mut v = Vec64::from_slice(data);
191 sort_str(&mut v);
192 v.iter().map(|s| s.to_string()).collect()
193}
194
195pub fn sort_string_array(offsets: &[usize], values: &str) -> (Vec64<usize>, String) {
198 let n = offsets.len() - 1;
199 let mut indices: Vec<usize> = (0..n).collect();
200
201 indices.sort_unstable_by(|&i, &j| {
202 let a = &values[offsets[i]..offsets[i + 1]];
203 let b = &values[offsets[j]..offsets[j + 1]];
204 a.cmp(b)
205 });
206
207 let total_bytes: usize = indices
209 .iter()
210 .map(|&idx| offsets[idx + 1] - offsets[idx])
211 .sum();
212
213 let mut new_values = String::with_capacity(total_bytes);
215 let mut new_offsets = Vec64::<usize>::with_capacity(n + 1);
216
217 unsafe {
219 new_offsets.set_len(n + 1);
220 }
221 unsafe {
222 new_values.as_mut_vec().set_len(total_bytes);
223 }
224
225 let values_bytes = values.as_bytes();
226 let out_bytes = unsafe { new_values.as_mut_vec() };
227
228 unsafe {
230 *new_offsets.get_unchecked_mut(0) = 0;
231 }
232 let mut current_offset = 0;
233 let mut out_ptr = 0;
234 for (i, &idx) in indices.iter().enumerate() {
235 let start = offsets[idx];
236 let end = offsets[idx + 1];
237 let len = end - start;
238 unsafe {
240 std::ptr::copy_nonoverlapping(
241 values_bytes.as_ptr().add(start),
242 out_bytes.as_mut_ptr().add(out_ptr),
243 len,
244 );
245 current_offset += len;
246 *new_offsets.get_unchecked_mut(i + 1) = current_offset;
247 }
248 out_ptr += len;
249 }
250 unsafe {
252 new_values.as_mut_vec().set_len(current_offset);
253 }
254
255 (new_offsets, new_values)
256}
257
258pub fn sort_categorical_lexical<T: Ord + Copy + Zero + NumCast + Integer>(
261 cat: &CategoricalArray<T>,
262) -> (Vec64<T>, Option<Bitmask>) {
263 let len = cat.data.len();
264 let mut indices: Vec<usize> = (0..len).collect();
265
266 indices.sort_by(|&i, &j| {
268 let a_is_null = cat.is_null(i);
269 let b_is_null = cat.is_null(j);
270 match (a_is_null, b_is_null) {
271 (true, true) => Ordering::Equal,
272 (true, false) => Ordering::Less,
273 (false, true) => Ordering::Greater,
274 (false, false) => {
275 let a_key = &cat.unique_values[cat.data[i].to_usize()];
277 let b_key = &cat.unique_values[cat.data[j].to_usize()];
278 a_key.cmp(b_key)
279 }
280 }
281 });
282
283 let mut sorted_data = Vec64::<T>::with_capacity(len);
285 let mut sorted_mask = cat
286 .null_mask
287 .as_ref()
288 .map(|_| Bitmask::new_set_all(len, false));
289
290 for (out_i, &orig_i) in indices.iter().enumerate() {
291 sorted_data.push(cat.data[orig_i]);
292 if let Some(ref mask) = cat.null_mask {
293 if let Some(ref mut sm) = sorted_mask {
294 if unsafe { mask.get_unchecked(orig_i) } {
295 unsafe { sm.set_unchecked(out_i, true) };
296 }
297 }
298 }
299 }
300
301 (sorted_data, sorted_mask)
302}
303
304#[inline]
306fn unpack_bitmask(mask: &Bitmask) -> Vec64<bool> {
307 let mut out = Vec64::with_capacity(mask.len);
308 for i in 0..mask.len {
309 out.push(unsafe { mask.get_unchecked(i) });
310 }
311 out
312}
313
314#[inline]
316fn pack_bitmask(bits: &[bool]) -> Bitmask {
317 let mut mask = Bitmask::new_set_all(bits.len(), false);
318 for (i, &b) in bits.iter().enumerate() {
319 if b {
320 unsafe { mask.set_unchecked(i, true) };
321 }
322 }
323 mask
324}
325
326pub fn sort_boolean_array(arr: &mut BooleanArray<()>) {
329 let bits: Vec64<bool> = unpack_bitmask(&arr.data);
330 let nulls: Option<Vec64<bool>> = arr.null_mask.as_ref().map(unpack_bitmask);
331
332 let mut indices: Vec<usize> = (0..arr.len).collect();
333
334 indices.sort_unstable_by(|&i, &j| {
336 let a_null = !nulls.as_ref().map_or(true, |n| n[i]);
337 let b_null = !nulls.as_ref().map_or(true, |n| n[j]);
338
339 match (a_null, b_null) {
340 (true, true) => Ordering::Equal, (true, false) => Ordering::Less, (false, true) => Ordering::Greater, (false, false) => {
344 match (bits[i], bits[j]) {
346 (false, false) => Ordering::Equal,
347 (false, true) => Ordering::Less,
348 (true, false) => Ordering::Greater,
349 (true, true) => Ordering::Equal,
350 }
351 }
352 }
353 });
354
355 let sorted_bits: Vec<bool> = indices
357 .iter()
358 .map(|&idx| {
359 let is_null = !nulls.as_ref().map_or(true, |n| n[idx]);
360 if is_null { false } else { bits[idx] }
361 })
362 .collect();
363 arr.data = pack_bitmask(&sorted_bits);
364
365 if let Some(null_mask) = arr.null_mask.as_mut() {
367 let sorted_nulls: Vec<bool> = indices
368 .iter()
369 .map(|&idx| nulls.as_ref().unwrap()[idx])
370 .collect();
371 *null_mask = pack_bitmask(&sorted_nulls);
372 }
373}
374
375pub fn sort_slice_with_mask<T: Ord + Copy>(
377 data: &[T],
378 mask: Option<&Bitmask>,
379) -> (Vec64<T>, Option<Bitmask>) {
380 let n = data.len();
381 let mut indices: Vec<usize> = (0..n).collect();
382 indices.sort_unstable_by(|&i, &j| data[i].cmp(&data[j]));
383
384 let mut sorted_data = Vec64::<T>::with_capacity(n);
385 unsafe {
386 sorted_data.set_len(n);
387 }
388 for (i, &idx) in indices.iter().enumerate() {
389 unsafe {
390 *sorted_data.get_unchecked_mut(i) = data[idx];
391 }
392 }
393
394 let sorted_mask = mask.map(|m| {
395 let mut out = Bitmask::new_set_all(n, false);
396 for (i, &idx) in indices.iter().enumerate() {
397 if unsafe { m.get_unchecked(idx) } {
398 unsafe { out.set_unchecked(i, true) };
399 }
400 }
401 out
402 });
403
404 (sorted_data, sorted_mask)
405}
406
407#[cfg(test)]
408mod tests {
409 use minarrow::vec64;
410
411 use super::*;
412
413 #[test]
414 fn test_total_cmp_f32_ordering_basic() {
415 let a = 1.0f32;
416 let b = 2.0f32;
417 assert_eq!(total_cmp_f(&a, &b), std::cmp::Ordering::Less);
418 assert_eq!(total_cmp_f(&b, &a), std::cmp::Ordering::Greater);
419 assert_eq!(total_cmp_f(&a, &a), std::cmp::Ordering::Equal);
420 }
421
422 #[test]
423 fn test_total_cmp_f64_ordering_basic() {
424 let a = 1.0f64;
425 let b = 2.0f64;
426 assert_eq!(total_cmp_f(&a, &b), std::cmp::Ordering::Less);
427 assert_eq!(total_cmp_f(&b, &a), std::cmp::Ordering::Greater);
428 assert_eq!(total_cmp_f(&a, &a), std::cmp::Ordering::Equal);
429 }
430
431 #[test]
432 fn test_total_cmp_zero_and_negzero() {
433 let pz = 0.0f32;
434 let nz = -0.0f32;
435 assert_ne!(f32::to_bits(pz), f32::to_bits(nz));
437 assert_ne!(total_cmp_f(&pz, &nz), std::cmp::Ordering::Equal);
438 }
439
440 #[test]
441 fn test_total_cmp_nan_ordering_f32() {
442 let nan = f32::NAN;
443 let pos_inf = f32::INFINITY;
444 let neg_inf = f32::NEG_INFINITY;
445 let one = 1.0f32;
446
447 assert_eq!(total_cmp_f(&nan, &pos_inf), std::cmp::Ordering::Greater);
449 assert_eq!(total_cmp_f(&nan, &neg_inf), std::cmp::Ordering::Greater);
450 assert_eq!(total_cmp_f(&nan, &one), std::cmp::Ordering::Greater);
451 assert_eq!(total_cmp_f(&nan, &nan), std::cmp::Ordering::Equal);
453 }
454
455 #[test]
456 fn test_total_cmp_nan_ordering_f64() {
457 let nan = f64::NAN;
458 let pos_inf = f64::INFINITY;
459 let neg_inf = f64::NEG_INFINITY;
460 let one = 1.0f64;
461
462 assert_eq!(total_cmp_f(&nan, &pos_inf), std::cmp::Ordering::Greater);
463 assert_eq!(total_cmp_f(&nan, &neg_inf), std::cmp::Ordering::Greater);
464 assert_eq!(total_cmp_f(&nan, &one), std::cmp::Ordering::Greater);
465 assert_eq!(total_cmp_f(&nan, &nan), std::cmp::Ordering::Equal);
466 }
467
468 #[test]
469 fn test_sort_float_f32_all_edge_cases() {
470 let mut v = vec64![
471 3.0f32,
472 -0.0,
473 0.0,
474 f32::INFINITY,
475 f32::NEG_INFINITY,
476 1.0,
477 -1.0,
478 f32::NAN,
479 2.0,
480 -2.0,
481 ];
482 sort_float(&mut v);
483 assert_eq!(v[0], f32::NEG_INFINITY);
486 assert_eq!(v[1], -2.0);
487 assert_eq!(v[2], -1.0);
488 assert_eq!(v[3], -0.0);
489 assert_eq!(v[4], 0.0);
490 assert_eq!(v[5], 1.0);
491 assert_eq!(v[6], 2.0);
492 assert_eq!(v[7], 3.0);
493 assert_eq!(v[8], f32::INFINITY);
494 assert!(v[9].is_nan());
495 }
496
497 #[test]
498 fn test_sort_float_f64_all_edge_cases() {
499 let mut v = vec64![
500 3.0f64,
501 -0.0,
502 0.0,
503 f64::INFINITY,
504 f64::NEG_INFINITY,
505 1.0,
506 -1.0,
507 f64::NAN,
508 2.0,
509 -2.0,
510 ];
511 sort_float(&mut v);
512 assert_eq!(v[0], f64::NEG_INFINITY);
513 assert_eq!(v[1], -2.0);
514 assert_eq!(v[2], -1.0);
515 assert_eq!(v[3], -0.0);
516 assert_eq!(v[4], 0.0);
517 assert_eq!(v[5], 1.0);
518 assert_eq!(v[6], 2.0);
519 assert_eq!(v[7], 3.0);
520 assert_eq!(v[8], f64::INFINITY);
521 assert!(v[9].is_nan());
522 }
523
524 #[test]
525 fn test_sorted_float_immutability_and_return_type() {
526 let v = vec64![1.0f32, 2.0, 3.0];
527 let out = sorted_float(&v);
528 assert_eq!(out.as_slice(), &[1.0, 2.0, 3.0]);
529 assert_eq!(*v, [1.0, 2.0, 3.0]);
531 }
532
533 #[test]
534 fn test_sorted_float_correct_for_f64() {
535 let v = vec64![3.0f64, 2.0, 1.0];
536 let out = sorted_float(&v);
537 assert_eq!(out.as_slice(), &[1.0, 2.0, 3.0]);
538 }
539
540 #[test]
541 fn test_sort_float_empty_and_single() {
542 let mut v: [f32; 0] = [];
543 sort_float(&mut v);
544 let mut v2 = [42.0f32];
545 sort_float(&mut v2);
546 assert_eq!(v2, [42.0]);
547 }
548
549 #[cfg(test)]
550 mod tests {
551 use super::*;
552 use minarrow::{Vec64, vec64};
553
554 #[test]
555 fn test_sort_int_empty_and_single() {
556 let mut arr: [i32; 0] = [];
557 sort_int(&mut arr);
558 assert_eq!(arr, [] as [i32; 0]);
559 let mut arr2 = vec64![42];
560 sort_int(&mut arr2);
561 assert_eq!(*arr2, [42]);
562 }
563
564 #[test]
565 fn test_sort_int_order() {
566 let mut arr = vec64![4, 2, 7, 1, 1, 6, 0, -5];
567 sort_int(&mut arr);
568 assert_eq!(*arr, [-5, 0, 1, 1, 2, 4, 6, 7]);
569 }
570
571 #[test]
572 fn test_sorted_int_immutability_and_output() {
573 let arr = vec64![5, 3, 7, 1, 2];
574 let sorted = sorted_int(&arr);
575 assert_eq!(sorted.as_slice(), &[1, 2, 3, 5, 7]);
576 assert_eq!(*arr, [5, 3, 7, 1, 2]);
578 }
579
580 #[test]
581 fn test_sort_str_basic() {
582 let mut arr = vec64!["z", "b", "a", "d"];
583 sort_str(&mut arr);
584 assert_eq!(*arr, ["a", "b", "d", "z"]);
585 }
586
587 #[test]
588 fn test_sorted_str_and_non_ascii() {
589 let arr = vec64!["z", "ä", "b", "a", "d"];
590 let sorted = sorted_str(&arr);
591 assert_eq!(sorted.as_slice(), &["a", "b", "d", "z", "ä"]); assert_eq!(*arr, ["z", "ä", "b", "a", "d"]);
594 }
595
596 #[test]
597 fn test_sort_string_array_basic() {
598 let offsets = vec![0, 1, 3, 5, 5, 6]; let values = "abcde".to_string() + "f";
600 let (new_offsets, new_values) = sort_string_array(&offsets, &values);
601 let slices: Vec<_> = new_offsets
603 .windows(2)
604 .map(|w| &new_values[w[0]..w[1]])
605 .collect();
606 assert_eq!(slices, &["", "a", "bc", "de", "f"]);
607 }
608
609 #[test]
610 fn test_sort_categorical_lexical_basic_and_nulls() {
611 let unique = Vec64::from_slice_clone(&[
613 "apple".to_string(),
614 "banana".to_string(),
615 "pear".to_string(),
616 ]);
617 let data = Vec64::from_slice(&[2u8, 0, 1, 1, 2, 0, 1]);
618 let mask = Bitmask::from_bools(&[true, false, true, true, true, false, true]);
619 let cat = CategoricalArray {
620 data: data.into(),
621 unique_values: unique,
622 null_mask: Some(mask.clone()),
623 };
624 let (sorted_data, sorted_mask) = sort_categorical_lexical(&cat);
625
626 let mask_out = sorted_mask.unwrap();
628 let null_pos: Vec<_> = (0..mask_out.len()).filter(|&i| !mask_out.get(i)).collect();
629 assert_eq!(null_pos, &[0, 1]);
630
631 let sorted_as_str: Vec<_> = sorted_data
633 .iter()
634 .map(|&k| &cat.unique_values[k.to_usize()][..])
635 .collect();
636 let vals = &sorted_as_str[null_pos.len()..];
637 assert_eq!(vals, &["banana", "banana", "banana", "pear", "pear"]);
638 }
639
640 #[test]
641 fn test_sort_categorical_all_nulls_and_no_nulls() {
642 let unique = Vec64::from_slice_clone(&["x".to_string()]);
644 let data = Vec64::from_slice(&[0u8, 0, 0]);
645 let mask = Bitmask::from_bools(&[false, false, false]);
646 let cat = CategoricalArray {
647 data: data.clone().into(),
648 unique_values: unique.clone(),
649 null_mask: Some(mask.clone()),
650 };
651 let (_, sorted_mask) = sort_categorical_lexical(&cat);
652 assert_eq!(
653 unpack_bitmask(&sorted_mask.unwrap()),
654 vec64![false, false, false]
655 );
656 let mask2 = Bitmask::from_bools(&[true, true, true]);
658 let cat2 = CategoricalArray {
659 data: data.clone().into(),
660 unique_values: unique,
661 null_mask: Some(mask2.clone()),
662 };
663 let (_, sorted_mask2) = sort_categorical_lexical(&cat2);
664 assert_eq!(
665 unpack_bitmask(&sorted_mask2.unwrap()),
666 vec64![true, true, true]
667 );
668 }
669 #[test]
670 fn test_sort_boolean_array_with_nulls() {
671 let mut arr = BooleanArray {
672 data: pack_bitmask(&[false, true, false, true, true, false]),
673 null_mask: Some(pack_bitmask(&[true, false, true, true, false, true])),
674 len: 6,
675 _phantom: std::marker::PhantomData,
676 };
677 sort_boolean_array(&mut arr);
678 let bits = unpack_bitmask(&arr.data);
680 let nulls = unpack_bitmask(arr.null_mask.as_ref().unwrap());
681 assert_eq!(nulls, vec64![false, false, true, true, true, true]);
682 assert_eq!(&bits[2..], [false, false, false, true]);
684 }
685
686 #[test]
687 fn test_sort_boolean_array_all_true_and_all_false() {
688 let mut arr = BooleanArray {
689 data: pack_bitmask(&[true, true, true]),
690 null_mask: None,
691 len: 3,
692 _phantom: std::marker::PhantomData,
693 };
694 sort_boolean_array(&mut arr);
695 assert_eq!(unpack_bitmask(&arr.data), vec64![true, true, true]);
696
697 let mut arr2 = BooleanArray {
698 data: pack_bitmask(&[false, false, false]),
699 null_mask: None,
700 len: 3,
701 _phantom: std::marker::PhantomData,
702 };
703 sort_boolean_array(&mut arr2);
704 assert_eq!(unpack_bitmask(&arr2.data), vec64![false, false, false]);
705 }
706
707 #[test]
708 fn test_sort_boolean_array_all_nulls_and_none() {
709 let mut arr = BooleanArray {
710 data: pack_bitmask(&[true, false, true]),
711 null_mask: Some(pack_bitmask(&[false, false, false])),
712 len: 3,
713 _phantom: std::marker::PhantomData,
714 };
715 sort_boolean_array(&mut arr);
716 assert_eq!(
717 unpack_bitmask(arr.null_mask.as_ref().unwrap()),
718 vec64![false, false, false]
719 );
720 }
721
722 #[test]
723 fn test_sort_slice_with_mask_basic() {
724 let data = vec64![3, 1, 2, 1];
725 let mask = pack_bitmask(&[true, false, true, true]);
726 let (sorted, mask_out) = sort_slice_with_mask(&data, Some(&mask));
727 assert_eq!(sorted.as_slice(), &[1, 1, 2, 3]);
728 assert_eq!(
729 unpack_bitmask(&mask_out.unwrap()),
730 vec64![false, true, true, true]
731 );
732 }
733
734 #[test]
735 fn test_sort_slice_with_mask_no_mask() {
736 let data = vec64![3, 2, 1, 1, 0];
737 let (sorted, mask_out) = sort_slice_with_mask(&data, None);
738 assert_eq!(sorted.as_slice(), &[0, 1, 1, 2, 3]);
739 assert!(mask_out.is_none());
740 }
741
742 #[test]
743 fn test_sort_slice_with_mask_all_true_and_all_false() {
744 let data = vec64![3, 2, 1, 0];
745 let mask_true = pack_bitmask(&[true; 4]);
746 let mask_false = pack_bitmask(&[false; 4]);
747 let (_, mask_true_out) = sort_slice_with_mask(&data, Some(&mask_true));
748 let (_, mask_false_out) = sort_slice_with_mask(&data, Some(&mask_false));
749 assert_eq!(
750 unpack_bitmask(&mask_true_out.unwrap()),
751 vec64![true, true, true, true]
752 );
753 assert_eq!(
754 unpack_bitmask(&mask_false_out.unwrap()),
755 vec64![false, false, false, false]
756 );
757 }
758
759 #[test]
760 fn test_sort_int_with_duplicates_and_negatives() {
761 let mut arr = vec64![-10, -1, 5, 0, 5, -10];
762 sort_int(&mut arr);
763 assert_eq!(*arr, [-10, -10, -1, 0, 5, 5]);
764 }
765
766 #[test]
767 fn test_sort_str_empty_and_duplicate() {
768 let mut arr = vec64!["", "a", "b", "a", ""];
769 sort_str(&mut arr);
770 assert_eq!(*arr, ["", "", "a", "a", "b"]);
771 }
772 }
773}