1use crate::sort::bam_fields;
13use crate::sort::keys::{RawSortKey, SortContext};
14use std::cmp::Ordering;
15use std::io::{Read, Write};
16
17#[repr(transparent)]
28#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Debug)]
29pub struct PackedCoordKey(pub u64);
30
31impl PackedCoordKey {
32 #[inline]
40 #[must_use]
41 #[allow(clippy::cast_sign_loss)]
42 pub fn new(tid: i32, pos: i32, reverse: bool, nref: u32) -> Self {
43 let tid = if tid < 0 { nref } else { tid as u32 };
45 #[allow(clippy::cast_lossless)] let key = (u64::from(tid) << 34)
49 | ((i64::from(pos) as u64).wrapping_add(1) << 1)
50 | u64::from(reverse);
51 Self(key)
52 }
53
54 #[inline]
56 #[must_use]
57 pub fn unmapped() -> Self {
58 Self(u64::MAX)
59 }
60}
61
62#[repr(C)]
74#[derive(Copy, Clone, Debug)]
75pub struct RecordRef {
76 pub sort_key: u64,
78 pub offset: u64,
80 pub len: u32,
82 padding: u32,
84}
85
86impl PartialEq for RecordRef {
87 fn eq(&self, other: &Self) -> bool {
88 self.sort_key == other.sort_key
89 }
90}
91
92impl Eq for RecordRef {}
93
94impl PartialOrd for RecordRef {
95 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
96 Some(self.cmp(other))
97 }
98}
99
100impl Ord for RecordRef {
101 fn cmp(&self, other: &Self) -> Ordering {
102 self.sort_key.cmp(&other.sort_key)
103 }
104}
105
106#[repr(C)]
115#[derive(Copy, Clone, Debug)]
116struct InlineHeader {
117 sort_key: u64,
119 record_len: u32,
121 padding: u32,
123}
124
125const HEADER_SIZE: usize = std::mem::size_of::<InlineHeader>(); pub struct RecordBuffer {
141 data: Vec<u8>,
143 refs: Vec<RecordRef>,
145 nref: u32,
147}
148
149impl RecordBuffer {
150 #[must_use]
157 pub fn with_capacity(estimated_records: usize, estimated_bytes: usize, nref: u32) -> Self {
158 Self {
159 data: Vec::with_capacity(estimated_bytes + estimated_records * HEADER_SIZE),
160 refs: Vec::with_capacity(estimated_records),
161 nref,
162 }
163 }
164
165 #[inline]
173 pub fn push_coordinate(&mut self, record: &[u8]) {
174 let offset = self.data.len() as u64;
175 let len = u32::try_from(record.len()).expect("record length exceeds u32::MAX");
176
177 let sort_key = extract_coordinate_key_inline(record, self.nref);
179
180 let header = InlineHeader { sort_key, record_len: len, padding: 0 };
182
183 self.data.extend_from_slice(&header.sort_key.to_le_bytes());
185 self.data.extend_from_slice(&header.record_len.to_le_bytes());
186 self.data.extend_from_slice(&header.padding.to_le_bytes());
187
188 self.data.extend_from_slice(record);
190
191 self.refs.push(RecordRef { sort_key, offset, len, padding: 0 });
193 }
194
195 pub fn sort(&mut self) {
200 radix_sort_record_refs(&mut self.refs);
201 }
202
203 pub fn par_sort(&mut self) {
208 parallel_radix_sort_record_refs(&mut self.refs);
212 }
213
214 #[inline]
216 #[must_use]
217 #[allow(clippy::cast_possible_truncation)]
218 pub fn get_record(&self, r: &RecordRef) -> &[u8] {
219 let start = r.offset as usize + HEADER_SIZE;
220 let end = start + r.len as usize;
221 &self.data[start..end]
222 }
223
224 pub fn iter_sorted(&self) -> impl Iterator<Item = &[u8]> {
226 self.refs.iter().map(|r| self.get_record(r))
227 }
228
229 #[must_use]
231 pub fn refs(&self) -> &[RecordRef] {
232 &self.refs
233 }
234
235 #[must_use]
237 pub fn memory_usage(&self) -> usize {
238 self.data.len() + self.refs.len() * std::mem::size_of::<RecordRef>()
239 }
240
241 #[must_use]
243 pub fn len(&self) -> usize {
244 self.refs.len()
245 }
246
247 #[must_use]
249 pub fn is_empty(&self) -> bool {
250 self.refs.is_empty()
251 }
252
253 pub fn clear(&mut self) {
255 self.data.clear();
256 self.refs.clear();
257 }
258
259 #[must_use]
261 pub fn nref(&self) -> u32 {
262 self.nref
263 }
264}
265
266#[inline]
282#[must_use]
283pub fn extract_coordinate_key_inline(bam: &[u8], nref: u32) -> u64 {
284 let tid = bam_fields::ref_id(bam);
285 let pos = bam_fields::pos(bam);
286 let reverse = (bam_fields::flags(bam) & bam_fields::flags::REVERSE) != 0;
287
288 if tid < 0 {
292 PackedCoordKey::unmapped().0
293 } else {
294 PackedCoordKey::new(tid, pos, reverse, nref).0
295 }
296}
297
298#[repr(C)]
315#[derive(Copy, Clone, Debug, Eq, PartialEq, bytemuck::Pod, bytemuck::Zeroable)]
316pub struct TemplateKey {
317 pub primary: u64,
320 pub secondary: u64,
323 pub tertiary: u64,
325 pub name_hash_upper: u64,
328}
329
330impl TemplateKey {
331 #[allow(clippy::too_many_arguments, clippy::cast_sign_loss)]
333 #[must_use]
334 pub fn new(
335 tid1: i32,
336 pos1: i32,
337 neg1: bool,
338 tid2: i32,
339 pos2: i32,
340 neg2: bool,
341 library: u32,
342 mi: (u64, bool),
343 name_hash: u64,
344 is_upper: bool,
345 ) -> Self {
346 let tid1_packed = if tid1 == i32::MAX { 0xFFFF_u64 } else { (tid1.max(0) as u64) & 0xFFFF };
348 let tid2_packed = if tid2 == i32::MAX { 0xFFFF_u64 } else { (tid2.max(0) as u64) & 0xFFFF };
349 let pos1_packed = u64::from((pos1 as u32) ^ 0x8000_0000) & 0xFFFF_FFFF;
353 let pos2_packed = u64::from((pos2 as u32) ^ 0x8000_0000) & 0xFFFF_FFFF;
354
355 let p1 = (tid1_packed << 48) | (tid2_packed << 32) | pos1_packed;
358
359 let p2 = (pos2_packed << 32) | (u64::from(!neg1) << 1) | u64::from(!neg2);
363
364 let p3 = ((u64::from(library) & 0xFFFF) << 48)
367 | ((mi.0 & 0xFFFF_FFFF_FFFF) << 1)
368 | u64::from(!mi.1);
369
370 let p4 = (name_hash << 1) | u64::from(is_upper);
373
374 Self { primary: p1, secondary: p2, tertiary: p3, name_hash_upper: p4 }
375 }
376
377 #[must_use]
379 pub fn unmapped(name_hash: u64, is_read2: bool) -> Self {
380 Self {
381 primary: u64::MAX,
382 secondary: u64::MAX,
383 tertiary: 0,
384 name_hash_upper: (name_hash << 1) | u64::from(is_read2),
385 }
386 }
387
388 #[inline]
390 #[must_use]
391 pub fn zeroed() -> Self {
392 Self { primary: 0, secondary: 0, tertiary: 0, name_hash_upper: 0 }
393 }
394}
395
396impl Default for TemplateKey {
397 fn default() -> Self {
398 Self::zeroed()
399 }
400}
401
402impl TemplateKey {
403 #[inline]
405 #[must_use]
406 pub fn to_bytes(&self) -> [u8; 32] {
407 let mut buf = [0u8; 32];
408 buf[0..8].copy_from_slice(&self.primary.to_le_bytes());
409 buf[8..16].copy_from_slice(&self.secondary.to_le_bytes());
410 buf[16..24].copy_from_slice(&self.tertiary.to_le_bytes());
411 buf[24..32].copy_from_slice(&self.name_hash_upper.to_le_bytes());
412 buf
413 }
414
415 #[inline]
417 #[must_use]
418 pub fn from_bytes(buf: &[u8; 32]) -> Self {
419 Self {
420 primary: u64::from_le_bytes([
421 buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7],
422 ]),
423 secondary: u64::from_le_bytes([
424 buf[8], buf[9], buf[10], buf[11], buf[12], buf[13], buf[14], buf[15],
425 ]),
426 tertiary: u64::from_le_bytes([
427 buf[16], buf[17], buf[18], buf[19], buf[20], buf[21], buf[22], buf[23],
428 ]),
429 name_hash_upper: u64::from_le_bytes([
430 buf[24], buf[25], buf[26], buf[27], buf[28], buf[29], buf[30], buf[31],
431 ]),
432 }
433 }
434}
435
436impl PartialOrd for TemplateKey {
437 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
438 Some(self.cmp(other))
439 }
440}
441
442impl Ord for TemplateKey {
443 fn cmp(&self, other: &Self) -> Ordering {
444 self.primary
445 .cmp(&other.primary)
446 .then_with(|| self.secondary.cmp(&other.secondary))
447 .then_with(|| self.tertiary.cmp(&other.tertiary))
448 .then_with(|| self.name_hash_upper.cmp(&other.name_hash_upper))
450 }
451}
452
453impl TemplateKey {
454 #[inline]
459 #[must_use]
460 pub fn core_cmp(&self, other: &Self) -> Ordering {
461 self.primary
462 .cmp(&other.primary)
463 .then_with(|| self.secondary.cmp(&other.secondary))
464 .then_with(|| self.tertiary.cmp(&other.tertiary))
465 }
466}
467
468impl RawSortKey for TemplateKey {
469 const SERIALIZED_SIZE: Option<usize> = Some(32);
470
471 fn extract(_bam: &[u8], _ctx: &SortContext) -> Self {
472 unreachable!(
476 "TemplateKey::extract() should not be called directly. \
477 Use extract_template_key_inline() with LibraryLookup instead."
478 )
479 }
480
481 #[inline]
482 fn write_to<W: Write>(&self, writer: &mut W) -> std::io::Result<()> {
483 writer.write_all(&self.to_bytes())
484 }
485
486 #[inline]
487 fn read_from<R: Read>(reader: &mut R) -> std::io::Result<Self> {
488 let mut buf = [0u8; 32];
489 reader.read_exact(&mut buf)?;
490 Ok(Self::from_bytes(&buf))
491 }
492}
493
494#[repr(C)]
506#[derive(Copy, Clone, Debug)]
507pub struct TemplateInlineHeader {
508 pub key: TemplateKey,
510 pub record_len: u32,
512 pub padding: u32,
514}
515
516pub const TEMPLATE_HEADER_SIZE: usize = 40; impl TemplateInlineHeader {
520 #[inline]
522 pub fn write_to(&self, buf: &mut Vec<u8>) {
523 buf.extend_from_slice(&self.key.primary.to_le_bytes());
524 buf.extend_from_slice(&self.key.secondary.to_le_bytes());
525 buf.extend_from_slice(&self.key.tertiary.to_le_bytes());
526 buf.extend_from_slice(&self.key.name_hash_upper.to_le_bytes());
527 buf.extend_from_slice(&self.record_len.to_le_bytes());
528 buf.extend_from_slice(&self.padding.to_le_bytes());
529 }
530
531 #[inline]
533 #[must_use]
534 pub fn read_from(data: &[u8]) -> Self {
535 let primary = u64::from_le_bytes([
536 data[0], data[1], data[2], data[3], data[4], data[5], data[6], data[7],
537 ]);
538 let secondary = u64::from_le_bytes([
539 data[8], data[9], data[10], data[11], data[12], data[13], data[14], data[15],
540 ]);
541 let tertiary = u64::from_le_bytes([
542 data[16], data[17], data[18], data[19], data[20], data[21], data[22], data[23],
543 ]);
544 let name_hash_upper = u64::from_le_bytes([
545 data[24], data[25], data[26], data[27], data[28], data[29], data[30], data[31],
546 ]);
547 let record_len = u32::from_le_bytes([data[32], data[33], data[34], data[35]]);
548
549 Self {
550 key: TemplateKey { primary, secondary, tertiary, name_hash_upper },
551 record_len,
552 padding: 0,
553 }
554 }
555}
556
557#[repr(C)]
564#[derive(Copy, Clone, Debug)]
565pub struct TemplateRecordRef {
566 pub key: TemplateKey,
568 pub offset: u64,
570 pub len: u32,
572 pub padding: u32,
574}
575
576impl PartialEq for TemplateRecordRef {
577 fn eq(&self, other: &Self) -> bool {
578 self.offset == other.offset
579 }
580}
581
582impl Eq for TemplateRecordRef {}
583
584impl PartialOrd for TemplateRecordRef {
585 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
586 Some(self.cmp(other))
587 }
588}
589
590impl Ord for TemplateRecordRef {
591 fn cmp(&self, other: &Self) -> Ordering {
592 self.offset.cmp(&other.offset)
594 }
595}
596
597pub struct TemplateRecordBuffer {
612 data: Vec<u8>,
614 refs: Vec<TemplateRecordRef>,
616}
617
618impl TemplateRecordBuffer {
619 #[must_use]
621 pub fn with_capacity(estimated_records: usize, estimated_bytes: usize) -> Self {
622 let header_bytes = estimated_records * TEMPLATE_HEADER_SIZE;
624 Self {
625 data: Vec::with_capacity(estimated_bytes + header_bytes),
626 refs: Vec::with_capacity(estimated_records),
627 }
628 }
629
630 #[inline]
636 pub fn push(&mut self, record: &[u8], key: TemplateKey) {
637 let offset = self.data.len() as u64;
638 let record_len = u32::try_from(record.len()).expect("record length exceeds u32::MAX");
639
640 let header = TemplateInlineHeader { key, record_len, padding: 0 };
642 header.write_to(&mut self.data);
643
644 self.data.extend_from_slice(record);
646
647 self.refs.push(TemplateRecordRef { key, offset, len: record_len, padding: 0 });
649 }
650
651 pub fn sort(&mut self) {
657 radix_sort_template_refs(&mut self.refs);
658 }
659
660 pub fn par_sort(&mut self) {
665 parallel_radix_sort_template_refs(&mut self.refs);
666 }
667
668 #[inline]
670 #[must_use]
671 #[allow(clippy::cast_possible_truncation)]
672 pub fn get_record(&self, r: &TemplateRecordRef) -> &[u8] {
673 let start = r.offset as usize + TEMPLATE_HEADER_SIZE;
674 let end = start + r.len as usize;
675 &self.data[start..end]
676 }
677
678 pub fn iter_sorted(&self) -> impl Iterator<Item = &[u8]> {
680 self.refs.iter().map(|r| self.get_record(r))
681 }
682
683 #[must_use]
685 pub fn refs(&self) -> &[TemplateRecordRef] {
686 &self.refs
687 }
688
689 #[must_use]
691 pub fn memory_usage(&self) -> usize {
692 self.data.len() + self.refs.len() * std::mem::size_of::<TemplateRecordRef>()
693 }
694
695 #[must_use]
697 pub fn len(&self) -> usize {
698 self.refs.len()
699 }
700
701 #[must_use]
703 pub fn is_empty(&self) -> bool {
704 self.refs.is_empty()
705 }
706
707 #[inline]
709 #[must_use]
710 pub fn get_key(&self, r: &TemplateRecordRef) -> TemplateKey {
711 r.key
712 }
713
714 pub fn iter_sorted_keyed(&self) -> impl Iterator<Item = (TemplateKey, &[u8])> {
717 self.refs.iter().map(|r| (self.get_key(r), self.get_record(r)))
718 }
719
720 pub fn clear(&mut self) {
722 self.data.clear();
723 self.refs.clear();
724 }
725
726 #[must_use]
728 pub fn data(&self) -> &[u8] {
729 &self.data
730 }
731}
732
733const RADIX_THRESHOLD: usize = 256;
739
740#[allow(clippy::uninit_vec, unsafe_code)]
749pub fn radix_sort_record_refs(refs: &mut [RecordRef]) {
750 let n = refs.len();
751 if n < RADIX_THRESHOLD {
752 insertion_sort_refs(refs);
754 return;
755 }
756
757 let max_key = refs.iter().map(|r| r.sort_key).max().unwrap_or(0);
759 let bytes_needed =
760 if max_key == 0 { 0 } else { ((64 - max_key.leading_zeros()) as usize).div_ceil(8) };
761
762 if bytes_needed == 0 {
763 return; }
765
766 let mut aux: Vec<RecordRef> = Vec::with_capacity(n);
768 unsafe {
769 aux.set_len(n);
770 }
771
772 let mut src = refs as *mut [RecordRef];
773 let mut dst = aux.as_mut_slice() as *mut [RecordRef];
774
775 for byte_idx in 0..bytes_needed {
777 let src_slice = unsafe { &*src };
778 let dst_slice = unsafe { &mut *dst };
779
780 let mut counts = [0usize; 256];
782 for r in src_slice {
783 let byte = ((r.sort_key >> (byte_idx * 8)) & 0xFF) as usize;
784 counts[byte] += 1;
785 }
786
787 let mut total = 0;
789 for count in &mut counts {
790 let c = *count;
791 *count = total;
792 total += c;
793 }
794
795 for r in src_slice {
797 let byte = ((r.sort_key >> (byte_idx * 8)) & 0xFF) as usize;
798 let dest_idx = counts[byte];
799 counts[byte] += 1;
800 dst_slice[dest_idx] = *r;
801 }
802
803 std::mem::swap(&mut src, &mut dst);
805 }
806
807 if bytes_needed % 2 == 1 {
809 let src_slice = unsafe { &*src };
810 refs.copy_from_slice(src_slice);
811 }
812}
813
814pub fn parallel_radix_sort_record_refs(refs: &mut [RecordRef]) {
819 use rayon::prelude::*;
820
821 let n = refs.len();
822 if n < RADIX_THRESHOLD * 2 {
823 radix_sort_record_refs(refs);
825 return;
826 }
827
828 let n_threads = rayon::current_num_threads();
830
831 if n_threads > 1 && n > 10_000 {
833 let chunk_size = n.div_ceil(n_threads);
834
835 refs.par_chunks_mut(chunk_size).for_each(|chunk| {
837 radix_sort_record_refs(chunk);
838 });
839
840 let chunk_boundaries: Vec<_> = refs
843 .chunks(chunk_size)
844 .scan(0, |pos, chunk| {
845 let start = *pos;
846 *pos += chunk.len();
847 Some(start..*pos)
848 })
849 .collect();
850
851 merge_sorted_chunks(refs, &chunk_boundaries);
852 } else {
853 radix_sort_record_refs(refs);
855 }
856}
857
858fn merge_sorted_chunks(refs: &mut [RecordRef], chunk_ranges: &[std::ops::Range<usize>]) {
860 use crate::sort::radix::{heap_make, heap_sift_down};
861
862 struct HeapEntry {
864 key: u64,
865 chunk_idx: usize,
866 pos: usize,
867 }
868
869 if chunk_ranges.len() <= 1 {
870 return;
871 }
872
873 let n = refs.len();
874 let mut result: Vec<RecordRef> = Vec::with_capacity(n);
875
876 let mut heap: Vec<HeapEntry> = Vec::with_capacity(chunk_ranges.len());
878 for (chunk_idx, range) in chunk_ranges.iter().enumerate() {
879 if !range.is_empty() {
880 heap.push(HeapEntry { key: refs[range.start].sort_key, chunk_idx, pos: range.start });
881 }
882 }
883
884 if heap.is_empty() {
885 return;
886 }
887
888 let lt = |a: &HeapEntry, b: &HeapEntry| -> bool { (a.key, a.chunk_idx) > (b.key, b.chunk_idx) };
891
892 heap_make(&mut heap, <);
893 let mut heap_size = heap.len();
894
895 while heap_size > 0 {
897 let entry = &heap[0];
898 result.push(refs[entry.pos]);
899
900 let chunk_idx = entry.chunk_idx;
901 let next_pos = entry.pos + 1;
902 let range = &chunk_ranges[chunk_idx];
903
904 if next_pos < range.end {
905 heap[0] = HeapEntry { key: refs[next_pos].sort_key, chunk_idx, pos: next_pos };
907 heap_sift_down(&mut heap, 0, heap_size, <);
908 } else {
909 heap_size -= 1;
911 if heap_size > 0 {
912 heap.swap(0, heap_size);
913 heap_sift_down(&mut heap, 0, heap_size, <);
914 }
915 }
916 }
917
918 refs.copy_from_slice(&result);
920}
921
922fn insertion_sort_refs(refs: &mut [RecordRef]) {
924 for i in 1..refs.len() {
925 let key = refs[i].sort_key;
926 let insert_pos = refs[..i].partition_point(|r| r.sort_key <= key);
927 if insert_pos < i {
928 refs[insert_pos..=i].rotate_right(1);
929 }
930 }
931}
932
933#[allow(clippy::uninit_vec, unsafe_code)]
953pub fn radix_sort_template_refs(refs: &mut [TemplateRecordRef]) {
954 let n = refs.len();
955 if n < RADIX_THRESHOLD {
956 insertion_sort_template_refs(refs);
957 return;
958 }
959
960 let mut aux: Vec<TemplateRecordRef> = Vec::with_capacity(n);
962 unsafe {
963 aux.set_len(n);
964 }
965
966 let fields = [
969 |r: &TemplateRecordRef| r.key.name_hash_upper,
970 |r: &TemplateRecordRef| r.key.tertiary,
971 |r: &TemplateRecordRef| r.key.secondary,
972 |r: &TemplateRecordRef| r.key.primary,
973 ];
974
975 for (field_idx, get_field) in fields.iter().enumerate() {
976 let max_val = refs.iter().map(get_field).max().unwrap_or(0);
978 let bytes_needed =
979 if max_val == 0 { 0 } else { ((64 - max_val.leading_zeros()) as usize).div_ceil(8) };
980
981 if bytes_needed == 0 {
982 continue; }
984
985 radix_sort_template_field(refs, &mut aux, get_field, bytes_needed, field_idx);
987 }
988}
989
990#[allow(clippy::uninit_vec, unsafe_code)]
992fn radix_sort_template_field<F>(
993 refs: &mut [TemplateRecordRef],
994 aux: &mut [TemplateRecordRef],
995 get_field: F,
996 bytes_needed: usize,
997 _field_idx: usize,
998) where
999 F: Fn(&TemplateRecordRef) -> u64,
1000{
1001 let n = refs.len();
1002
1003 let mut src = refs as *mut [TemplateRecordRef];
1005 let mut dst = aux as *mut [TemplateRecordRef];
1006
1007 for byte_idx in 0..bytes_needed {
1008 let src_slice = unsafe { &*src };
1009 let dst_slice = unsafe { &mut *dst };
1010
1011 let mut counts = [0usize; 256];
1013 for r in src_slice {
1014 let byte = ((get_field(r) >> (byte_idx * 8)) & 0xFF) as usize;
1015 counts[byte] += 1;
1016 }
1017
1018 let mut total = 0;
1020 for count in &mut counts {
1021 let c = *count;
1022 *count = total;
1023 total += c;
1024 }
1025
1026 for item in src_slice.iter().take(n) {
1028 let byte = ((get_field(item) >> (byte_idx * 8)) & 0xFF) as usize;
1029 let dest_idx = counts[byte];
1030 counts[byte] += 1;
1031 dst_slice[dest_idx] = *item;
1032 }
1033
1034 std::mem::swap(&mut src, &mut dst);
1036 }
1037
1038 if bytes_needed % 2 == 1 {
1040 let src_slice = unsafe { &*src };
1041 refs.copy_from_slice(src_slice);
1042 }
1043}
1044
1045pub fn parallel_radix_sort_template_refs(refs: &mut [TemplateRecordRef]) {
1049 use rayon::prelude::*;
1050
1051 let n = refs.len();
1052 if n < RADIX_THRESHOLD * 2 {
1053 radix_sort_template_refs(refs);
1054 return;
1055 }
1056
1057 let n_threads = rayon::current_num_threads();
1058
1059 if n_threads > 1 && n > 10_000 {
1060 let chunk_size = n.div_ceil(n_threads);
1061
1062 refs.par_chunks_mut(chunk_size).for_each(|chunk| {
1064 radix_sort_template_refs(chunk);
1065 });
1066
1067 let chunk_boundaries: Vec<_> = refs
1069 .chunks(chunk_size)
1070 .scan(0, |pos, chunk| {
1071 let start = *pos;
1072 *pos += chunk.len();
1073 Some(start..*pos)
1074 })
1075 .collect();
1076
1077 merge_sorted_template_chunks(refs, &chunk_boundaries);
1078 } else {
1079 radix_sort_template_refs(refs);
1080 }
1081}
1082
1083fn merge_sorted_template_chunks(
1087 refs: &mut [TemplateRecordRef],
1088 chunk_ranges: &[std::ops::Range<usize>],
1089) {
1090 use crate::sort::radix::{heap_make, heap_sift_down};
1091
1092 struct HeapEntry {
1093 key: TemplateKey,
1094 chunk_idx: usize,
1095 pos: usize,
1096 }
1097
1098 if chunk_ranges.len() <= 1 {
1099 return;
1100 }
1101
1102 let n = refs.len();
1103 let mut result: Vec<TemplateRecordRef> = Vec::with_capacity(n);
1104
1105 let mut heap: Vec<HeapEntry> = Vec::with_capacity(chunk_ranges.len());
1106 for (chunk_idx, range) in chunk_ranges.iter().enumerate() {
1107 if !range.is_empty() {
1108 heap.push(HeapEntry { key: refs[range.start].key, chunk_idx, pos: range.start });
1109 }
1110 }
1111
1112 if heap.is_empty() {
1113 return;
1114 }
1115
1116 let lt = |a: &HeapEntry, b: &HeapEntry| -> bool {
1118 match a.key.cmp(&b.key) {
1119 std::cmp::Ordering::Greater => true,
1120 std::cmp::Ordering::Less => false,
1121 std::cmp::Ordering::Equal => a.chunk_idx > b.chunk_idx,
1122 }
1123 };
1124
1125 heap_make(&mut heap, <);
1126 let mut heap_size = heap.len();
1127
1128 while heap_size > 0 {
1129 let entry = &heap[0];
1130 result.push(refs[entry.pos]);
1131
1132 let chunk_idx = entry.chunk_idx;
1133 let next_pos = entry.pos + 1;
1134 let range = &chunk_ranges[chunk_idx];
1135
1136 if next_pos < range.end {
1137 heap[0] = HeapEntry { key: refs[next_pos].key, chunk_idx, pos: next_pos };
1138 heap_sift_down(&mut heap, 0, heap_size, <);
1139 } else {
1140 heap_size -= 1;
1141 if heap_size > 0 {
1142 heap.swap(0, heap_size);
1143 heap_sift_down(&mut heap, 0, heap_size, <);
1144 }
1145 }
1146 }
1147
1148 refs.copy_from_slice(&result);
1149}
1150
1151fn insertion_sort_template_refs(refs: &mut [TemplateRecordRef]) {
1153 for i in 1..refs.len() {
1154 let key = &refs[i].key;
1155 let insert_pos = refs[..i].partition_point(|r| r.key <= *key);
1156 if insert_pos < i {
1157 refs[insert_pos..=i].rotate_right(1);
1158 }
1159 }
1160}
1161
1162#[cfg(test)]
1163mod tests {
1164 use super::*;
1165
1166 #[test]
1167 fn test_packed_coord_key_ordering() {
1168 assert!(PackedCoordKey::new(0, 100, false, 10) < PackedCoordKey::new(1, 100, false, 10));
1170
1171 assert!(PackedCoordKey::new(0, 100, false, 10) < PackedCoordKey::new(0, 200, false, 10));
1173
1174 assert!(PackedCoordKey::new(0, 100, false, 10) < PackedCoordKey::new(0, 100, true, 10));
1176
1177 assert!(PackedCoordKey::new(9, 1_000_000, true, 10) < PackedCoordKey::unmapped());
1179 }
1180
1181 #[test]
1182 fn test_template_key_ordering() {
1183 let k1 = TemplateKey::new(0, 100, false, 0, 200, false, 0, (1, true), 0, false);
1184 let k2 = TemplateKey::new(0, 100, false, 0, 200, false, 0, (2, true), 0, false);
1185 assert!(k1 < k2);
1186
1187 let ka = TemplateKey::new(0, 100, false, 0, 200, false, 0, (1, true), 0, false);
1189 let kb = TemplateKey::new(0, 100, false, 0, 200, false, 0, (1, false), 0, false);
1190 assert!(ka < kb);
1191
1192 let lower = TemplateKey::new(0, 100, false, 0, 200, false, 0, (1, true), 12345, false);
1194 let upper = TemplateKey::new(0, 100, false, 0, 200, false, 0, (1, true), 12345, true);
1195 assert!(lower < upper, "is_upper=false should sort before is_upper=true");
1196
1197 let first_hash_lo =
1199 TemplateKey::new(0, 100, false, 0, 200, false, 0, (1, true), 100, false);
1200 let first_hash_hi = TemplateKey::new(0, 100, false, 0, 200, false, 0, (1, true), 100, true);
1201 let second_hash = TemplateKey::new(0, 100, false, 0, 200, false, 0, (1, true), 200, false);
1202 assert!(first_hash_lo < second_hash);
1204 assert!(first_hash_hi < second_hash);
1205 }
1206
1207 #[test]
1208 fn test_radix_sort_record_refs() {
1209 let mut refs = vec![
1211 RecordRef { sort_key: 100, offset: 0, len: 10, padding: 0 },
1212 RecordRef { sort_key: 50, offset: 100, len: 10, padding: 0 },
1213 RecordRef { sort_key: 200, offset: 200, len: 10, padding: 0 },
1214 RecordRef { sort_key: 50, offset: 300, len: 10, padding: 0 }, RecordRef { sort_key: 1, offset: 400, len: 10, padding: 0 },
1216 ];
1217
1218 radix_sort_record_refs(&mut refs);
1219
1220 assert_eq!(refs[0].sort_key, 1);
1222 assert_eq!(refs[1].sort_key, 50);
1223 assert_eq!(refs[2].sort_key, 50);
1224 assert_eq!(refs[3].sort_key, 100);
1225 assert_eq!(refs[4].sort_key, 200);
1226
1227 assert_eq!(refs[1].offset, 100);
1230 assert_eq!(refs[2].offset, 300);
1231 }
1232
1233 #[test]
1234 #[allow(clippy::cast_sign_loss)]
1235 fn test_radix_sort_large() {
1236 let mut refs: Vec<RecordRef> = (0..1000)
1238 .map(|i| RecordRef {
1239 sort_key: (999 - i) as u64, offset: i as u64 * 100,
1241 len: 10,
1242 padding: 0,
1243 })
1244 .collect();
1245
1246 radix_sort_record_refs(&mut refs);
1247
1248 for (i, r) in refs.iter().enumerate() {
1250 assert_eq!(r.sort_key, i as u64, "Expected sort_key {i} at index {i}");
1251 }
1252 }
1253
1254 #[test]
1255 fn test_radix_sort_empty() {
1256 let mut refs: Vec<RecordRef> = Vec::new();
1257 radix_sort_record_refs(&mut refs);
1258 assert!(refs.is_empty());
1259 }
1260
1261 #[test]
1262 fn test_radix_sort_single() {
1263 let mut refs = vec![RecordRef { sort_key: 42, offset: 0, len: 10, padding: 0 }];
1264 radix_sort_record_refs(&mut refs);
1265 assert_eq!(refs.len(), 1);
1266 assert_eq!(refs[0].sort_key, 42);
1267 }
1268
1269 #[test]
1270 #[allow(clippy::cast_sign_loss)]
1271 fn test_radix_sort_all_same_keys() {
1272 let mut refs: Vec<RecordRef> = (0..100)
1274 .map(|i| RecordRef { sort_key: 42, offset: i as u64 * 100, len: 10, padding: 0 })
1275 .collect();
1276
1277 radix_sort_record_refs(&mut refs);
1278
1279 for (i, r) in refs.iter().enumerate() {
1281 assert_eq!(r.sort_key, 42);
1282 assert_eq!(r.offset, i as u64 * 100, "Stability violated at index {i}");
1283 }
1284 }
1285
1286 #[test]
1287 #[allow(clippy::cast_sign_loss)]
1288 fn test_radix_sort_all_zero_keys() {
1289 let mut refs: Vec<RecordRef> = (0..50)
1290 .map(|i| RecordRef { sort_key: 0, offset: i as u64 * 100, len: 10, padding: 0 })
1291 .collect();
1292
1293 radix_sort_record_refs(&mut refs);
1294
1295 for (i, r) in refs.iter().enumerate() {
1297 assert_eq!(r.sort_key, 0);
1298 assert_eq!(r.offset, i as u64 * 100);
1299 }
1300 }
1301
1302 #[test]
1303 fn test_radix_sort_max_key() {
1304 let mut refs = vec![
1306 RecordRef { sort_key: u64::MAX, offset: 0, len: 10, padding: 0 },
1307 RecordRef { sort_key: 0, offset: 100, len: 10, padding: 0 },
1308 RecordRef { sort_key: u64::MAX / 2, offset: 200, len: 10, padding: 0 },
1309 ];
1310
1311 radix_sort_record_refs(&mut refs);
1312
1313 assert_eq!(refs[0].sort_key, 0);
1314 assert_eq!(refs[1].sort_key, u64::MAX / 2);
1315 assert_eq!(refs[2].sort_key, u64::MAX);
1316 }
1317
1318 #[test]
1319 #[allow(clippy::cast_sign_loss)]
1320 fn test_parallel_radix_sort() {
1321 let mut refs: Vec<RecordRef> = (0..50_000)
1323 .map(|i| RecordRef {
1324 sort_key: (49_999 - i) as u64, offset: i as u64 * 100,
1326 len: 10,
1327 padding: 0,
1328 })
1329 .collect();
1330
1331 parallel_radix_sort_record_refs(&mut refs);
1332
1333 for (i, r) in refs.iter().enumerate() {
1335 assert_eq!(r.sort_key, i as u64, "Expected sort_key {i} at index {i}");
1336 }
1337 }
1338
1339 #[test]
1340 #[allow(clippy::cast_sign_loss)]
1341 fn test_parallel_radix_sort_stability() {
1342 let mut refs: Vec<RecordRef> = (0..20_000)
1344 .map(|i| RecordRef {
1345 sort_key: (i / 100) as u64, offset: i as u64, len: 10,
1348 padding: 0,
1349 })
1350 .collect();
1351
1352 parallel_radix_sort_record_refs(&mut refs);
1353
1354 for i in 1..refs.len() {
1356 assert!(refs[i - 1].sort_key <= refs[i].sort_key, "Not sorted at index {i}");
1357 if refs[i - 1].sort_key == refs[i].sort_key {
1359 assert!(refs[i - 1].offset < refs[i].offset, "Stability violated at index {i}");
1360 }
1361 }
1362 }
1363
1364 #[test]
1365 #[allow(clippy::cast_sign_loss)]
1366 fn test_radix_sort_template_refs_stability() {
1367 let key = TemplateKey::new(0, 100, false, 0, 200, false, 0, (1, true), 12345, false);
1370
1371 let mut refs: Vec<TemplateRecordRef> = (0..500)
1372 .map(|i| TemplateRecordRef {
1373 key, offset: i as u64, len: 10,
1376 padding: 0,
1377 })
1378 .collect();
1379
1380 radix_sort_template_refs(&mut refs);
1381
1382 for i in 1..refs.len() {
1384 assert!(
1385 refs[i - 1].offset < refs[i].offset,
1386 "Template radix sort stability violated at index {}: offset {} should be < {}",
1387 i,
1388 refs[i - 1].offset,
1389 refs[i].offset
1390 );
1391 }
1392 }
1393
1394 #[test]
1395 #[allow(clippy::cast_sign_loss)]
1396 fn test_parallel_radix_sort_template_refs_stability() {
1397 let mut refs: Vec<TemplateRecordRef> = (0..20_000)
1400 .map(|i| {
1401 let group = i / 100;
1403 let key = TemplateKey::new(
1404 0,
1405 100,
1406 false,
1407 0,
1408 200,
1409 false,
1410 0,
1411 (1, true),
1412 group as u64, false,
1414 );
1415 TemplateRecordRef {
1416 key,
1417 offset: i as u64, len: 10,
1419 padding: 0,
1420 }
1421 })
1422 .collect();
1423
1424 parallel_radix_sort_template_refs(&mut refs);
1425
1426 for i in 1..refs.len() {
1428 let prev_key = &refs[i - 1].key;
1429 let curr_key = &refs[i].key;
1430 assert!(prev_key <= curr_key, "Not sorted at index {i}");
1431 if prev_key == curr_key {
1433 assert!(
1434 refs[i - 1].offset < refs[i].offset,
1435 "Parallel template sort stability violated at index {}: offset {} should be < {}",
1436 i,
1437 refs[i - 1].offset,
1438 refs[i].offset
1439 );
1440 }
1441 }
1442 }
1443
1444 #[test]
1445 fn test_template_record_buffer_sort_stability() {
1446 let mut buffer = TemplateRecordBuffer::with_capacity(100, 10000);
1449
1450 let key = TemplateKey::new(0, 100, false, 0, 200, false, 0, (1, true), 12345, false);
1452
1453 for i in 0..100u8 {
1455 let mut record = vec![
1459 0, 0, 0, 0, 100, 0, 0, 0, 2, 0, 0, 0, 0, 0, 99, 0, 1, 0, 0, 0, 0, 0, 0, 0, 200, 0, 0, 0, 0, 0, 0, 0, b'A', 0, i, 0xFF, ];
1475 while record.len() < 40 {
1477 record.push(0);
1478 }
1479 buffer.push(&record, key);
1480 }
1481
1482 buffer.sort();
1483
1484 let mut prev_seq_byte = None;
1486 for rec in buffer.iter_sorted() {
1487 let seq_byte = rec.get(34).copied().unwrap_or(0);
1489 if let Some(prev) = prev_seq_byte {
1490 assert!(
1491 prev < seq_byte,
1492 "TemplateRecordBuffer::sort() stability violated: {prev} should be < {seq_byte}"
1493 );
1494 }
1495 prev_seq_byte = Some(seq_byte);
1496 }
1497 }
1498}