use crate::utils::*;
use crate::RadixKey;
#[inline]
pub fn out_of_place_sort<T>(
src_bucket: &[T],
dst_bucket: &mut [T],
counts: &[usize; 256],
level: usize,
) where
T: RadixKey + Sized + Send + Copy + Sync,
{
if src_bucket.len() < 2 {
dst_bucket.copy_from_slice(src_bucket);
return;
}
let mut prefix_sums = get_prefix_sums(counts);
let chunks = src_bucket.chunks_exact(8);
let rem = chunks.remainder();
chunks.into_iter().for_each(|chunk| {
let a = chunk[0].get_level(level) as usize;
let b = chunk[1].get_level(level) as usize;
let c = chunk[2].get_level(level) as usize;
let d = chunk[3].get_level(level) as usize;
let e = chunk[4].get_level(level) as usize;
let f = chunk[5].get_level(level) as usize;
let g = chunk[6].get_level(level) as usize;
let h = chunk[7].get_level(level) as usize;
dst_bucket[prefix_sums[a]] = chunk[0];
prefix_sums[a] += 1;
dst_bucket[prefix_sums[b]] = chunk[1];
prefix_sums[b] += 1;
dst_bucket[prefix_sums[c]] = chunk[2];
prefix_sums[c] += 1;
dst_bucket[prefix_sums[d]] = chunk[3];
prefix_sums[d] += 1;
dst_bucket[prefix_sums[e]] = chunk[4];
prefix_sums[e] += 1;
dst_bucket[prefix_sums[f]] = chunk[5];
prefix_sums[f] += 1;
dst_bucket[prefix_sums[g]] = chunk[6];
prefix_sums[g] += 1;
dst_bucket[prefix_sums[h]] = chunk[7];
prefix_sums[h] += 1;
});
rem.iter().for_each(|val| {
let b = val.get_level(level) as usize;
dst_bucket[prefix_sums[b]] = *val;
prefix_sums[b] += 1;
});
}
#[inline]
pub fn out_of_place_sort_with_counts<T>(
src_bucket: &[T],
dst_bucket: &mut [T],
counts: &[usize; 256],
level: usize,
) -> [usize; 256]
where
T: RadixKey + Sized + Send + Copy + Sync,
{
if src_bucket.is_empty() {
return [0usize; 256];
} else if src_bucket.len() == 1 {
let mut counts = [0usize; 256];
dst_bucket.copy_from_slice(src_bucket);
counts[src_bucket[0].get_level(level) as usize] = 1;
return counts;
}
let next_level = level + 1;
let mut prefix_sums = get_prefix_sums(counts);
let mut next_counts_0 = [0usize; 256];
let mut next_counts_1 = [0usize; 256];
let chunks = src_bucket.chunks_exact(8);
let rem = chunks.remainder();
chunks.into_iter().for_each(|chunk| {
let b0 = chunk[0].get_level(level) as usize;
let bn0 = chunk[0].get_level(next_level) as usize;
let b1 = chunk[1].get_level(level) as usize;
let bn1 = chunk[1].get_level(next_level) as usize;
let b2 = chunk[2].get_level(level) as usize;
let bn2 = chunk[2].get_level(next_level) as usize;
let b3 = chunk[3].get_level(level) as usize;
let bn3 = chunk[3].get_level(next_level) as usize;
let b4 = chunk[4].get_level(level) as usize;
let bn4 = chunk[4].get_level(next_level) as usize;
let b5 = chunk[5].get_level(level) as usize;
let bn5 = chunk[5].get_level(next_level) as usize;
let b6 = chunk[6].get_level(level) as usize;
let bn6 = chunk[6].get_level(next_level) as usize;
let b7 = chunk[7].get_level(level) as usize;
let bn7 = chunk[7].get_level(next_level) as usize;
dst_bucket[prefix_sums[b0]] = chunk[0];
prefix_sums[b0] += 1;
next_counts_0[bn0] += 1;
dst_bucket[prefix_sums[b1]] = chunk[1];
prefix_sums[b1] += 1;
next_counts_1[bn1] += 1;
dst_bucket[prefix_sums[b2]] = chunk[2];
prefix_sums[b2] += 1;
next_counts_0[bn2] += 1;
dst_bucket[prefix_sums[b3]] = chunk[3];
prefix_sums[b3] += 1;
next_counts_1[bn3] += 1;
dst_bucket[prefix_sums[b4]] = chunk[4];
prefix_sums[b4] += 1;
next_counts_0[bn4] += 1;
dst_bucket[prefix_sums[b5]] = chunk[5];
prefix_sums[b5] += 1;
next_counts_1[bn5] += 1;
dst_bucket[prefix_sums[b6]] = chunk[6];
prefix_sums[b6] += 1;
next_counts_0[bn6] += 1;
dst_bucket[prefix_sums[b7]] = chunk[7];
prefix_sums[b7] += 1;
next_counts_1[bn7] += 1;
});
rem.iter().for_each(|val| {
let b = val.get_level(level) as usize;
let bn = val.get_level(next_level) as usize;
dst_bucket[prefix_sums[b]] = *val;
prefix_sums[b] += 1;
next_counts_0[bn] += 1;
});
for i in 0..256 {
next_counts_0[i] += next_counts_1[i];
}
next_counts_0
}
#[inline]
pub fn lr_out_of_place_sort<T>(
src_bucket: &[T],
dst_bucket: &mut [T],
counts: &[usize; 256],
level: usize,
) where
T: RadixKey + Sized + Send + Copy + Sync,
{
if src_bucket.len() < 2 {
dst_bucket.copy_from_slice(src_bucket);
return;
}
let mut offsets = get_prefix_sums(counts);
let mut ends = [0usize; 256];
for (i, b) in offsets.iter().enumerate() {
ends[i] = b + counts[i].saturating_sub(1);
}
let mut left = 0;
let mut right = src_bucket.len() - 1;
let pre = src_bucket.len() % 8;
for _ in 0..pre {
let b = src_bucket[right].get_level(level) as usize;
dst_bucket[ends[b]] = src_bucket[right];
ends[b] = ends[b].saturating_sub(1);
right = right.saturating_sub(1);
}
if pre == src_bucket.len() {
return;
}
let end = (src_bucket.len() - pre) / 2;
while left < end {
let bl_0 = src_bucket[left].get_level(level) as usize;
let bl_1 = src_bucket[left + 1].get_level(level) as usize;
let bl_2 = src_bucket[left + 2].get_level(level) as usize;
let bl_3 = src_bucket[left + 3].get_level(level) as usize;
let br_0 = src_bucket[right].get_level(level) as usize;
let br_1 = src_bucket[right - 1].get_level(level) as usize;
let br_2 = src_bucket[right - 2].get_level(level) as usize;
let br_3 = src_bucket[right - 3].get_level(level) as usize;
dst_bucket[offsets[bl_0]] = src_bucket[left];
offsets[bl_0] = offsets[bl_0].wrapping_add(1);
dst_bucket[ends[br_0]] = src_bucket[right];
ends[br_0] = ends[br_0].wrapping_sub(1);
dst_bucket[offsets[bl_1]] = src_bucket[left + 1];
offsets[bl_1] = offsets[bl_1].wrapping_add(1);
dst_bucket[ends[br_1]] = src_bucket[right - 1];
ends[br_1] = ends[br_1].wrapping_sub(1);
dst_bucket[offsets[bl_2]] = src_bucket[left + 2];
offsets[bl_2] = offsets[bl_2].wrapping_add(1);
dst_bucket[ends[br_2]] = src_bucket[right - 2];
ends[br_2] = ends[br_2].wrapping_sub(1);
dst_bucket[offsets[bl_3]] = src_bucket[left + 3];
offsets[bl_3] = offsets[bl_3].wrapping_add(1);
dst_bucket[ends[br_3]] = src_bucket[right - 3];
ends[br_3] = ends[br_3].wrapping_sub(1);
left += 4;
right -= 4;
}
}
#[inline]
pub fn lr_out_of_place_sort_with_counts<T>(
src_bucket: &[T],
dst_bucket: &mut [T],
counts: &[usize; 256],
level: usize,
) -> [usize; 256]
where
T: RadixKey + Sized + Send + Copy + Sync,
{
if src_bucket.is_empty() {
return [0usize; 256];
} else if src_bucket.len() == 1 {
let mut counts = [0usize; 256];
dst_bucket.copy_from_slice(src_bucket);
counts[src_bucket[0].get_level(level) as usize] = 1;
return counts;
}
let next_level = level + 1;
let mut next_counts_0 = [0usize; 256];
let mut next_counts_1 = [0usize; 256];
let mut offsets = get_prefix_sums(counts);
let mut ends = [0usize; 256];
for (i, b) in offsets.iter().enumerate() {
ends[i] = b + counts[i].saturating_sub(1);
}
let mut left = 0;
let mut right = src_bucket.len() - 1;
let pre = src_bucket.len() % 8;
for _ in 0..pre {
let b = src_bucket[right].get_level(level) as usize;
let bn = src_bucket[right].get_level(next_level) as usize;
dst_bucket[ends[b]] = src_bucket[right];
ends[b] = ends[b].wrapping_sub(1);
right = right.wrapping_sub(1);
next_counts_0[bn] += 1;
}
if pre == src_bucket.len() {
return next_counts_0;
}
let end = (src_bucket.len() - pre) / 2;
while left < end {
let bl_0 = src_bucket[left].get_level(level) as usize;
let bl_1 = src_bucket[left + 1].get_level(level) as usize;
let bl_2 = src_bucket[left + 2].get_level(level) as usize;
let bl_3 = src_bucket[left + 3].get_level(level) as usize;
let br_0 = src_bucket[right].get_level(level) as usize;
let br_1 = src_bucket[right - 1].get_level(level) as usize;
let br_2 = src_bucket[right - 2].get_level(level) as usize;
let br_3 = src_bucket[right - 3].get_level(level) as usize;
dst_bucket[offsets[bl_0]] = src_bucket[left];
dst_bucket[ends[br_0]] = src_bucket[right];
ends[br_0] = ends[br_0].wrapping_sub(1);
offsets[bl_0] = offsets[bl_0].wrapping_add(1);
dst_bucket[offsets[bl_1]] = src_bucket[left + 1];
dst_bucket[ends[br_1]] = src_bucket[right - 1];
ends[br_1] = ends[br_1].wrapping_sub(1);
offsets[bl_1] = offsets[bl_1].wrapping_add(1);
dst_bucket[offsets[bl_2]] = src_bucket[left + 2];
dst_bucket[ends[br_2]] = src_bucket[right - 2];
ends[br_2] = ends[br_2].wrapping_sub(1);
offsets[bl_2] = offsets[bl_2].wrapping_add(1);
dst_bucket[offsets[bl_3]] = src_bucket[left + 3];
dst_bucket[ends[br_3]] = src_bucket[right - 3];
ends[br_3] = ends[br_3].wrapping_sub(1);
offsets[bl_3] = offsets[bl_3].wrapping_add(1);
let bnl_0 = src_bucket[left].get_level(next_level) as usize;
let bnl_1 = src_bucket[left + 1].get_level(next_level) as usize;
let bnl_2 = src_bucket[left + 2].get_level(next_level) as usize;
let bnl_3 = src_bucket[left + 3].get_level(next_level) as usize;
let bnr_0 = src_bucket[right].get_level(next_level) as usize;
let bnr_1 = src_bucket[right - 1].get_level(next_level) as usize;
let bnr_2 = src_bucket[right - 2].get_level(next_level) as usize;
let bnr_3 = src_bucket[right - 3].get_level(next_level) as usize;
next_counts_0[bnl_0] += 1;
next_counts_1[bnr_0] += 1;
next_counts_0[bnl_1] += 1;
next_counts_1[bnr_1] += 1;
next_counts_0[bnl_2] += 1;
next_counts_1[bnr_2] += 1;
next_counts_0[bnl_3] += 1;
next_counts_1[bnr_3] += 1;
left += 4;
right -= 4;
}
for i in 0..256 {
next_counts_0[i] += next_counts_1[i];
}
next_counts_0
}