1use std::cmp::Ordering;
11
12use minarrow::{Bitmask, BooleanArray, CategoricalArray, Integer, MaskedArray, Vec64};
13use num_traits::{Float, NumCast, Zero};
14
15#[inline]
22pub fn sort_float<T: Float>(slice: &mut [T]) {
23 slice.sort_unstable_by(total_cmp_f);
24}
25#[inline(always)]
30pub fn total_cmp_f<T: Float>(a: &T, b: &T) -> Ordering {
31 if std::mem::size_of::<T>() == 4 {
33 let ua = unsafe { *(a as *const T as *const u32) };
35 let ub = unsafe { *(b as *const T as *const u32) };
36 let ka = if ua & 0x8000_0000 != 0 {
38 !ua
39 } else {
40 ua ^ 0x8000_0000
41 };
42 let kb = if ub & 0x8000_0000 != 0 {
43 !ub
44 } else {
45 ub ^ 0x8000_0000
46 };
47 ka.cmp(&kb)
48 } else if std::mem::size_of::<T>() == 8 {
49 let ua = unsafe { *(a as *const T as *const u64) };
51 let ub = unsafe { *(b as *const T as *const u64) };
52 let ka = if ua & 0x8000_0000_0000_0000 != 0 {
53 !ua
54 } else {
55 ua ^ 0x8000_0000_0000_0000
56 };
57 let kb = if ub & 0x8000_0000_0000_0000 != 0 {
58 !ub
59 } else {
60 ub ^ 0x8000_0000_0000_0000
61 };
62 ka.cmp(&kb)
63 } else {
64 unreachable!("Only f32 and f64 are valid Float types.")
65 }
66}
67
68pub fn sorted_float<T: Float>(data: &[T]) -> Vec64<T> {
70 let mut v = Vec64::from_slice(data);
71 sort_float(&mut v);
72 v
73}
74
75#[inline]
95pub fn sort_int<T: Ord + Copy>(slice: &mut [T]) {
96 slice.sort_unstable();
97}
98
99pub fn sorted_int<T: Ord + Copy>(data: &[T]) -> Vec64<T> {
124 let mut v = Vec64::from_slice(data);
125 sort_int(&mut v);
126 v
127}
128
129#[inline]
146pub fn sort_str(slice: &mut [&str]) {
147 slice.sort_unstable();
148}
149
150pub fn sorted_str(data: &[&str]) -> Vec64<String> {
187 let mut v = Vec64::from_slice(data);
188 sort_str(&mut v);
189 v.iter().map(|s| s.to_string()).collect()
190}
191
192pub fn sort_string_array(offsets: &[usize], values: &str) -> (Vec64<usize>, String) {
195 let n = offsets.len() - 1;
196 let mut indices: Vec<usize> = (0..n).collect();
197
198 indices.sort_unstable_by(|&i, &j| {
199 let a = &values[offsets[i]..offsets[i + 1]];
200 let b = &values[offsets[j]..offsets[j + 1]];
201 a.cmp(b)
202 });
203
204 let total_bytes: usize = indices
206 .iter()
207 .map(|&idx| offsets[idx + 1] - offsets[idx])
208 .sum();
209
210 let mut new_values = String::with_capacity(total_bytes);
212 let mut new_offsets = Vec64::<usize>::with_capacity(n + 1);
213
214 unsafe {
216 new_offsets.set_len(n + 1);
217 }
218 unsafe {
219 new_values.as_mut_vec().set_len(total_bytes);
220 }
221
222 let values_bytes = values.as_bytes();
223 let out_bytes = unsafe { new_values.as_mut_vec() };
224
225 unsafe {
227 *new_offsets.get_unchecked_mut(0) = 0;
228 }
229 let mut current_offset = 0;
230 let mut out_ptr = 0;
231 for (i, &idx) in indices.iter().enumerate() {
232 let start = offsets[idx];
233 let end = offsets[idx + 1];
234 let len = end - start;
235 unsafe {
237 std::ptr::copy_nonoverlapping(
238 values_bytes.as_ptr().add(start),
239 out_bytes.as_mut_ptr().add(out_ptr),
240 len,
241 );
242 current_offset += len;
243 *new_offsets.get_unchecked_mut(i + 1) = current_offset;
244 }
245 out_ptr += len;
246 }
247 unsafe {
249 new_values.as_mut_vec().set_len(current_offset);
250 }
251
252 (new_offsets, new_values)
253}
254
255pub fn sort_categorical_lexical<T: Ord + Copy + Zero + NumCast + Integer>(
258 cat: &CategoricalArray<T>,
259) -> (Vec64<T>, Option<Bitmask>) {
260 let len = cat.data.len();
261 let mut indices: Vec<usize> = (0..len).collect();
262
263 indices.sort_by(|&i, &j| {
265 let a_is_null = cat.is_null(i);
266 let b_is_null = cat.is_null(j);
267 match (a_is_null, b_is_null) {
268 (true, true) => Ordering::Equal,
269 (true, false) => Ordering::Less,
270 (false, true) => Ordering::Greater,
271 (false, false) => {
272 let a_key = &cat.unique_values[cat.data[i].to_usize()];
274 let b_key = &cat.unique_values[cat.data[j].to_usize()];
275 a_key.cmp(b_key)
276 }
277 }
278 });
279
280 let mut sorted_data = Vec64::<T>::with_capacity(len);
282 let mut sorted_mask = cat
283 .null_mask
284 .as_ref()
285 .map(|_| Bitmask::new_set_all(len, false));
286
287 for (out_i, &orig_i) in indices.iter().enumerate() {
288 sorted_data.push(cat.data[orig_i]);
289 if let Some(ref mask) = cat.null_mask {
290 if let Some(ref mut sm) = sorted_mask {
291 if unsafe { mask.get_unchecked(orig_i) } {
292 unsafe { sm.set_unchecked(out_i, true) };
293 }
294 }
295 }
296 }
297
298 (sorted_data, sorted_mask)
299}
300
301#[inline]
303fn unpack_bitmask(mask: &Bitmask) -> Vec64<bool> {
304 let mut out = Vec64::with_capacity(mask.len);
305 for i in 0..mask.len {
306 out.push(unsafe { mask.get_unchecked(i) });
307 }
308 out
309}
310
311#[inline]
313fn pack_bitmask(bits: &[bool]) -> Bitmask {
314 let mut mask = Bitmask::new_set_all(bits.len(), false);
315 for (i, &b) in bits.iter().enumerate() {
316 if b {
317 unsafe { mask.set_unchecked(i, true) };
318 }
319 }
320 mask
321}
322
323pub fn sort_boolean_array(arr: &mut BooleanArray<()>) {
326 let bits: Vec64<bool> = unpack_bitmask(&arr.data);
327 let nulls: Option<Vec64<bool>> = arr.null_mask.as_ref().map(unpack_bitmask);
328
329 let mut indices: Vec<usize> = (0..arr.len).collect();
330
331 indices.sort_unstable_by(|&i, &j| {
333 let a_null = !nulls.as_ref().map_or(true, |n| n[i]);
334 let b_null = !nulls.as_ref().map_or(true, |n| n[j]);
335
336 match (a_null, b_null) {
337 (true, true) => Ordering::Equal, (true, false) => Ordering::Less, (false, true) => Ordering::Greater, (false, false) => {
341 match (bits[i], bits[j]) {
343 (false, false) => Ordering::Equal,
344 (false, true) => Ordering::Less,
345 (true, false) => Ordering::Greater,
346 (true, true) => Ordering::Equal,
347 }
348 }
349 }
350 });
351
352 let sorted_bits: Vec<bool> = indices
354 .iter()
355 .map(|&idx| {
356 let is_null = !nulls.as_ref().map_or(true, |n| n[idx]);
357 if is_null { false } else { bits[idx] }
358 })
359 .collect();
360 arr.data = pack_bitmask(&sorted_bits);
361
362 if let Some(null_mask) = arr.null_mask.as_mut() {
364 let sorted_nulls: Vec<bool> = indices
365 .iter()
366 .map(|&idx| nulls.as_ref().unwrap()[idx])
367 .collect();
368 *null_mask = pack_bitmask(&sorted_nulls);
369 }
370}
371
372pub fn sort_slice_with_mask<T: Ord + Copy>(
374 data: &[T],
375 mask: Option<&Bitmask>,
376) -> (Vec64<T>, Option<Bitmask>) {
377 let n = data.len();
378 let mut indices: Vec<usize> = (0..n).collect();
379 indices.sort_unstable_by(|&i, &j| data[i].cmp(&data[j]));
380
381 let mut sorted_data = Vec64::<T>::with_capacity(n);
382 unsafe {
383 sorted_data.set_len(n);
384 }
385 for (i, &idx) in indices.iter().enumerate() {
386 unsafe {
387 *sorted_data.get_unchecked_mut(i) = data[idx];
388 }
389 }
390
391 let sorted_mask = mask.map(|m| {
392 let mut out = Bitmask::new_set_all(n, false);
393 for (i, &idx) in indices.iter().enumerate() {
394 if unsafe { m.get_unchecked(idx) } {
395 unsafe { out.set_unchecked(i, true) };
396 }
397 }
398 out
399 });
400
401 (sorted_data, sorted_mask)
402}
403
404#[cfg(test)]
405mod tests {
406 use minarrow::vec64;
407
408 use super::*;
409
410 #[test]
411 fn test_total_cmp_f32_ordering_basic() {
412 let a = 1.0f32;
413 let b = 2.0f32;
414 assert_eq!(total_cmp_f(&a, &b), std::cmp::Ordering::Less);
415 assert_eq!(total_cmp_f(&b, &a), std::cmp::Ordering::Greater);
416 assert_eq!(total_cmp_f(&a, &a), std::cmp::Ordering::Equal);
417 }
418
419 #[test]
420 fn test_total_cmp_f64_ordering_basic() {
421 let a = 1.0f64;
422 let b = 2.0f64;
423 assert_eq!(total_cmp_f(&a, &b), std::cmp::Ordering::Less);
424 assert_eq!(total_cmp_f(&b, &a), std::cmp::Ordering::Greater);
425 assert_eq!(total_cmp_f(&a, &a), std::cmp::Ordering::Equal);
426 }
427
428 #[test]
429 fn test_total_cmp_zero_and_negzero() {
430 let pz = 0.0f32;
431 let nz = -0.0f32;
432 assert_ne!(f32::to_bits(pz), f32::to_bits(nz));
434 assert_ne!(total_cmp_f(&pz, &nz), std::cmp::Ordering::Equal);
435 }
436
437 #[test]
438 fn test_total_cmp_nan_ordering_f32() {
439 let nan = f32::NAN;
440 let pos_inf = f32::INFINITY;
441 let neg_inf = f32::NEG_INFINITY;
442 let one = 1.0f32;
443
444 assert_eq!(total_cmp_f(&nan, &pos_inf), std::cmp::Ordering::Greater);
446 assert_eq!(total_cmp_f(&nan, &neg_inf), std::cmp::Ordering::Greater);
447 assert_eq!(total_cmp_f(&nan, &one), std::cmp::Ordering::Greater);
448 assert_eq!(total_cmp_f(&nan, &nan), std::cmp::Ordering::Equal);
450 }
451
452 #[test]
453 fn test_total_cmp_nan_ordering_f64() {
454 let nan = f64::NAN;
455 let pos_inf = f64::INFINITY;
456 let neg_inf = f64::NEG_INFINITY;
457 let one = 1.0f64;
458
459 assert_eq!(total_cmp_f(&nan, &pos_inf), std::cmp::Ordering::Greater);
460 assert_eq!(total_cmp_f(&nan, &neg_inf), std::cmp::Ordering::Greater);
461 assert_eq!(total_cmp_f(&nan, &one), std::cmp::Ordering::Greater);
462 assert_eq!(total_cmp_f(&nan, &nan), std::cmp::Ordering::Equal);
463 }
464
465 #[test]
466 fn test_sort_float_f32_all_edge_cases() {
467 let mut v = vec64![
468 3.0f32,
469 -0.0,
470 0.0,
471 f32::INFINITY,
472 f32::NEG_INFINITY,
473 1.0,
474 -1.0,
475 f32::NAN,
476 2.0,
477 -2.0,
478 ];
479 sort_float(&mut v);
480 assert_eq!(v[0], f32::NEG_INFINITY);
483 assert_eq!(v[1], -2.0);
484 assert_eq!(v[2], -1.0);
485 assert_eq!(v[3], -0.0);
486 assert_eq!(v[4], 0.0);
487 assert_eq!(v[5], 1.0);
488 assert_eq!(v[6], 2.0);
489 assert_eq!(v[7], 3.0);
490 assert_eq!(v[8], f32::INFINITY);
491 assert!(v[9].is_nan());
492 }
493
494 #[test]
495 fn test_sort_float_f64_all_edge_cases() {
496 let mut v = vec64![
497 3.0f64,
498 -0.0,
499 0.0,
500 f64::INFINITY,
501 f64::NEG_INFINITY,
502 1.0,
503 -1.0,
504 f64::NAN,
505 2.0,
506 -2.0,
507 ];
508 sort_float(&mut v);
509 assert_eq!(v[0], f64::NEG_INFINITY);
510 assert_eq!(v[1], -2.0);
511 assert_eq!(v[2], -1.0);
512 assert_eq!(v[3], -0.0);
513 assert_eq!(v[4], 0.0);
514 assert_eq!(v[5], 1.0);
515 assert_eq!(v[6], 2.0);
516 assert_eq!(v[7], 3.0);
517 assert_eq!(v[8], f64::INFINITY);
518 assert!(v[9].is_nan());
519 }
520
521 #[test]
522 fn test_sorted_float_immutability_and_return_type() {
523 let v = vec64![1.0f32, 2.0, 3.0];
524 let out = sorted_float(&v);
525 assert_eq!(out.as_slice(), &[1.0, 2.0, 3.0]);
526 assert_eq!(*v, [1.0, 2.0, 3.0]);
528 }
529
530 #[test]
531 fn test_sorted_float_correct_for_f64() {
532 let v = vec64![3.0f64, 2.0, 1.0];
533 let out = sorted_float(&v);
534 assert_eq!(out.as_slice(), &[1.0, 2.0, 3.0]);
535 }
536
537 #[test]
538 fn test_sort_float_empty_and_single() {
539 let mut v: [f32; 0] = [];
540 sort_float(&mut v);
541 let mut v2 = [42.0f32];
542 sort_float(&mut v2);
543 assert_eq!(v2, [42.0]);
544 }
545
546 #[cfg(test)]
547 mod tests {
548 use super::*;
549 use minarrow::{Vec64, vec64};
550
551 #[test]
552 fn test_sort_int_empty_and_single() {
553 let mut arr: [i32; 0] = [];
554 sort_int(&mut arr);
555 assert_eq!(arr, [] as [i32; 0]);
556 let mut arr2 = vec64![42];
557 sort_int(&mut arr2);
558 assert_eq!(*arr2, [42]);
559 }
560
561 #[test]
562 fn test_sort_int_order() {
563 let mut arr = vec64![4, 2, 7, 1, 1, 6, 0, -5];
564 sort_int(&mut arr);
565 assert_eq!(*arr, [-5, 0, 1, 1, 2, 4, 6, 7]);
566 }
567
568 #[test]
569 fn test_sorted_int_immutability_and_output() {
570 let arr = vec64![5, 3, 7, 1, 2];
571 let sorted = sorted_int(&arr);
572 assert_eq!(sorted.as_slice(), &[1, 2, 3, 5, 7]);
573 assert_eq!(*arr, [5, 3, 7, 1, 2]);
575 }
576
577 #[test]
578 fn test_sort_str_basic() {
579 let mut arr = vec64!["z", "b", "a", "d"];
580 sort_str(&mut arr);
581 assert_eq!(*arr, ["a", "b", "d", "z"]);
582 }
583
584 #[test]
585 fn test_sorted_str_and_non_ascii() {
586 let arr = vec64!["z", "ä", "b", "a", "d"];
587 let sorted = sorted_str(&arr);
588 assert_eq!(sorted.as_slice(), &["a", "b", "d", "z", "ä"]); assert_eq!(*arr, ["z", "ä", "b", "a", "d"]);
591 }
592
593 #[test]
594 fn test_sort_string_array_basic() {
595 let offsets = vec![0, 1, 3, 5, 5, 6]; let values = "abcde".to_string() + "f";
597 let (new_offsets, new_values) = sort_string_array(&offsets, &values);
598 let slices: Vec<_> = new_offsets
600 .windows(2)
601 .map(|w| &new_values[w[0]..w[1]])
602 .collect();
603 assert_eq!(slices, &["", "a", "bc", "de", "f"]);
604 }
605
606 #[test]
607 fn test_sort_categorical_lexical_basic_and_nulls() {
608 let unique = Vec64::from_slice_clone(&[
610 "apple".to_string(),
611 "banana".to_string(),
612 "pear".to_string(),
613 ]);
614 let data = Vec64::from_slice(&[2u8, 0, 1, 1, 2, 0, 1]);
615 let mask = Bitmask::from_bools(&[true, false, true, true, true, false, true]);
616 let cat = CategoricalArray {
617 data: data.into(),
618 unique_values: unique,
619 null_mask: Some(mask.clone()),
620 };
621 let (sorted_data, sorted_mask) = sort_categorical_lexical(&cat);
622
623 let mask_out = sorted_mask.unwrap();
625 let null_pos: Vec<_> = (0..mask_out.len()).filter(|&i| !mask_out.get(i)).collect();
626 assert_eq!(null_pos, &[0, 1]);
627
628 let sorted_as_str: Vec<_> = sorted_data
630 .iter()
631 .map(|&k| &cat.unique_values[k.to_usize()][..])
632 .collect();
633 let vals = &sorted_as_str[null_pos.len()..];
634 assert_eq!(vals, &["banana", "banana", "banana", "pear", "pear"]);
635 }
636
637 #[test]
638 fn test_sort_categorical_all_nulls_and_no_nulls() {
639 let unique = Vec64::from_slice_clone(&["x".to_string()]);
641 let data = Vec64::from_slice(&[0u8, 0, 0]);
642 let mask = Bitmask::from_bools(&[false, false, false]);
643 let cat = CategoricalArray {
644 data: data.clone().into(),
645 unique_values: unique.clone(),
646 null_mask: Some(mask.clone()),
647 };
648 let (_, sorted_mask) = sort_categorical_lexical(&cat);
649 assert_eq!(
650 unpack_bitmask(&sorted_mask.unwrap()),
651 vec64![false, false, false]
652 );
653 let mask2 = Bitmask::from_bools(&[true, true, true]);
655 let cat2 = CategoricalArray {
656 data: data.clone().into(),
657 unique_values: unique,
658 null_mask: Some(mask2.clone()),
659 };
660 let (_, sorted_mask2) = sort_categorical_lexical(&cat2);
661 assert_eq!(
662 unpack_bitmask(&sorted_mask2.unwrap()),
663 vec64![true, true, true]
664 );
665 }
666 #[test]
667 fn test_sort_boolean_array_with_nulls() {
668 let mut arr = BooleanArray {
669 data: pack_bitmask(&[false, true, false, true, true, false]),
670 null_mask: Some(pack_bitmask(&[true, false, true, true, false, true])),
671 len: 6,
672 _phantom: std::marker::PhantomData,
673 };
674 sort_boolean_array(&mut arr);
675 let bits = unpack_bitmask(&arr.data);
677 let nulls = unpack_bitmask(arr.null_mask.as_ref().unwrap());
678 assert_eq!(nulls, vec64![false, false, true, true, true, true]);
679 assert_eq!(&bits[2..], [false, false, false, true]);
681 }
682
683 #[test]
684 fn test_sort_boolean_array_all_true_and_all_false() {
685 let mut arr = BooleanArray {
686 data: pack_bitmask(&[true, true, true]),
687 null_mask: None,
688 len: 3,
689 _phantom: std::marker::PhantomData,
690 };
691 sort_boolean_array(&mut arr);
692 assert_eq!(unpack_bitmask(&arr.data), vec64![true, true, true]);
693
694 let mut arr2 = BooleanArray {
695 data: pack_bitmask(&[false, false, false]),
696 null_mask: None,
697 len: 3,
698 _phantom: std::marker::PhantomData,
699 };
700 sort_boolean_array(&mut arr2);
701 assert_eq!(unpack_bitmask(&arr2.data), vec64![false, false, false]);
702 }
703
704 #[test]
705 fn test_sort_boolean_array_all_nulls_and_none() {
706 let mut arr = BooleanArray {
707 data: pack_bitmask(&[true, false, true]),
708 null_mask: Some(pack_bitmask(&[false, false, false])),
709 len: 3,
710 _phantom: std::marker::PhantomData,
711 };
712 sort_boolean_array(&mut arr);
713 assert_eq!(
714 unpack_bitmask(arr.null_mask.as_ref().unwrap()),
715 vec64![false, false, false]
716 );
717 }
718
719 #[test]
720 fn test_sort_slice_with_mask_basic() {
721 let data = vec64![3, 1, 2, 1];
722 let mask = pack_bitmask(&[true, false, true, true]);
723 let (sorted, mask_out) = sort_slice_with_mask(&data, Some(&mask));
724 assert_eq!(sorted.as_slice(), &[1, 1, 2, 3]);
725 assert_eq!(
726 unpack_bitmask(&mask_out.unwrap()),
727 vec64![false, true, true, true]
728 );
729 }
730
731 #[test]
732 fn test_sort_slice_with_mask_no_mask() {
733 let data = vec64![3, 2, 1, 1, 0];
734 let (sorted, mask_out) = sort_slice_with_mask(&data, None);
735 assert_eq!(sorted.as_slice(), &[0, 1, 1, 2, 3]);
736 assert!(mask_out.is_none());
737 }
738
739 #[test]
740 fn test_sort_slice_with_mask_all_true_and_all_false() {
741 let data = vec64![3, 2, 1, 0];
742 let mask_true = pack_bitmask(&[true; 4]);
743 let mask_false = pack_bitmask(&[false; 4]);
744 let (_, mask_true_out) = sort_slice_with_mask(&data, Some(&mask_true));
745 let (_, mask_false_out) = sort_slice_with_mask(&data, Some(&mask_false));
746 assert_eq!(
747 unpack_bitmask(&mask_true_out.unwrap()),
748 vec64![true, true, true, true]
749 );
750 assert_eq!(
751 unpack_bitmask(&mask_false_out.unwrap()),
752 vec64![false, false, false, false]
753 );
754 }
755
756 #[test]
757 fn test_sort_int_with_duplicates_and_negatives() {
758 let mut arr = vec64![-10, -1, 5, 0, 5, -10];
759 sort_int(&mut arr);
760 assert_eq!(*arr, [-10, -10, -1, 0, 5, 5]);
761 }
762
763 #[test]
764 fn test_sort_str_empty_and_duplicate() {
765 let mut arr = vec64!["", "a", "b", "a", ""];
766 sort_str(&mut arr);
767 assert_eq!(*arr, ["", "", "a", "a", "b"]);
768 }
769 }
770}