use crate::{ProcessingOrder, RadixDataType, RadixError, RadixResult, RadixSort, RadixSortable};
impl<T> RadixSort<T>
where
T: RadixSortable,
{
pub(crate) fn sort_strings_generic(&self, slice: &mut [T]) -> RadixResult<()> {
if slice.len() <= 1 {
return Ok(());
}
if T::data_type() != RadixDataType::String {
return Err(RadixError::unsupported_operation(
"sort_strings_generic requires RadixDataType::String",
));
}
let max_len = slice.iter().map(|s| T::string_length(s)).max().unwrap_or(0);
if max_len == 0 {
return Ok(());
}
if self.processing_order == ProcessingOrder::MsbFirst {
self.msd_radix_sort_generic(slice, 0, max_len)
} else {
self.lsb_radix_sort_generic(slice, max_len)
}
}
fn msd_radix_sort_generic(
&self,
slice: &mut [T],
start: usize,
max_len: usize,
) -> RadixResult<()> {
self.msd_radix_sort_recursive(slice, start, slice.len(), 0, max_len)
}
fn msd_radix_sort_recursive(
&self,
slice: &mut [T],
start: usize,
end: usize,
char_index: usize,
max_len: usize,
) -> RadixResult<()> {
if start >= end.saturating_sub(1) || char_index >= max_len {
return Ok(());
}
const RADIX: usize = 256; let mut count = [0usize; RADIX + 1];
for i in start..end {
let byte = T::string_byte_at(&slice[i], char_index) as usize;
count[byte + 1] += 1;
}
for i in 1..count.len() {
count[i] += count[i - 1];
}
let mut output: Vec<T> = Vec::with_capacity(end - start);
for i in start..end {
output.push(slice[i].clone());
}
let count_copy = count;
for i in start..end {
let byte = T::string_byte_at(&slice[i], char_index) as usize;
let pos = count_copy[byte];
output[pos - count_copy[0]] = slice[i].clone();
count[byte] += 1;
}
for (i, item) in output.into_iter().enumerate() {
slice[start + i] = item;
}
let mut prev_end = start;
for i in 0..RADIX + 1 {
let bucket_end = start + count[i] - count[0];
if bucket_end > prev_end + 1 {
self.msd_radix_sort_recursive(
slice,
prev_end,
bucket_end,
char_index + 1,
max_len,
)?;
}
prev_end = bucket_end;
}
Ok(())
}
fn lsb_radix_sort_generic(&self, slice: &mut [T], max_len: usize) -> RadixResult<()> {
let mut buffer: Vec<T> = Vec::with_capacity(slice.len());
for item in slice.iter() {
buffer.push(item.clone());
}
for char_index in (0..max_len).rev() {
self.counting_sort_strings_by_char_generic(slice, &mut buffer, char_index);
slice.swap_with_slice(&mut buffer);
}
Ok(())
}
fn counting_sort_strings_by_char_generic(
&self,
input: &mut [T],
output: &mut [T],
char_index: usize,
) {
const RADIX: usize = 256;
let mut count = [0usize; RADIX + 1];
let len = input.len();
for item in input.iter() {
let byte = T::string_byte_at(item, char_index) as usize;
count[byte + 1] += 1;
}
for i in 1..count.len() {
count[i] += count[i - 1];
}
for i in 0..len {
let byte = T::string_byte_at(&input[i], char_index) as usize;
output[count[byte]] = input[i].clone();
count[byte] += 1;
}
}
}
impl RadixSort<String> {
pub fn sort_strings_msd(&self, slice: &mut [String]) -> RadixResult<()> {
if slice.len() <= 1 {
return Ok(());
}
let max_len = slice.iter().map(|s| s.len()).max().unwrap_or(0);
if max_len == 0 {
return Ok(());
}
if self.processing_order == ProcessingOrder::MsbFirst {
self.msd_radix_sort_strings(slice, 0, slice.len(), 0, max_len)?;
} else {
self.lsb_radix_sort_strings_impl(slice, max_len)?;
}
Ok(())
}
fn msd_radix_sort_strings(
&self,
slice: &mut [String],
start: usize,
end: usize,
char_index: usize,
max_len: usize,
) -> RadixResult<()> {
if start >= end.saturating_sub(1) || char_index >= max_len {
return Ok(());
}
const RADIX: usize = 256; let mut count = [0usize; RADIX + 1];
for i in start..end {
let byte = slice[i].as_bytes().get(char_index).copied().unwrap_or(0) as usize;
count[byte + 1] += 1;
}
for i in 1..count.len() {
count[i] += count[i - 1];
}
let mut output: Vec<String> = Vec::with_capacity(end - start);
for _ in 0..(end - start) {
output.push(String::new());
}
let count_copy = count;
for i in start..end {
let byte = slice[i].as_bytes().get(char_index).copied().unwrap_or(0) as usize;
let pos = count_copy[byte];
output[pos - count_copy[0]] = slice[i].clone();
count[byte] += 1;
}
for (i, item) in output.into_iter().enumerate() {
slice[start + i] = item;
}
let mut prev_end = start;
for i in 0..RADIX + 1 {
let bucket_end = start + count[i] - count[0];
if bucket_end > prev_end + 1 {
self.msd_radix_sort_strings(slice, prev_end, bucket_end, char_index + 1, max_len)?;
}
prev_end = bucket_end;
}
Ok(())
}
fn lsb_radix_sort_strings_impl(&self, slice: &mut [String], max_len: usize) -> RadixResult<()> {
let mut buffer: Vec<String> = vec![String::new(); slice.len()];
for char_index in (0..max_len).rev() {
self.counting_sort_strings_by_char_impl(slice, &mut buffer, char_index);
slice.swap_with_slice(&mut buffer);
}
Ok(())
}
fn counting_sort_strings_by_char_impl(
&self,
input: &mut [String],
output: &mut [String],
char_index: usize,
) {
const RADIX: usize = 256;
let mut count = [0usize; RADIX + 1];
let len = input.len();
for item in input.iter() {
let byte = item.as_bytes().get(char_index).copied().unwrap_or(0) as usize;
count[byte + 1] += 1;
}
for i in 1..count.len() {
count[i] += count[i - 1];
}
for i in 0..len {
let byte = input[i].as_bytes().get(char_index).copied().unwrap_or(0) as usize;
output[count[byte]] = input[i].clone();
count[byte] += 1;
}
}
}
impl RadixSort<&str> {
pub fn sort_str_slice_msd(&self, slice: &mut [&str]) -> RadixResult<()> {
if slice.len() <= 1 {
return Ok(());
}
let max_len = slice.iter().map(|s| s.len()).max().unwrap_or(0);
if max_len == 0 {
return Ok(());
}
if self.processing_order == ProcessingOrder::MsbFirst {
self.msd_radix_sort_str_slice(slice, 0, slice.len(), 0, max_len)?;
} else {
self.lsb_radix_sort_str_slice_impl(slice, max_len)?;
}
Ok(())
}
fn msd_radix_sort_str_slice(
&self,
slice: &mut [&str],
start: usize,
end: usize,
char_index: usize,
max_len: usize,
) -> RadixResult<()> {
if start >= end.saturating_sub(1) || char_index >= max_len {
return Ok(());
}
const RADIX: usize = 256;
let mut count = [0usize; RADIX + 1];
for i in start..end {
let byte = slice[i].as_bytes().get(char_index).copied().unwrap_or(0) as usize;
count[byte + 1] += 1;
}
for i in 1..count.len() {
count[i] += count[i - 1];
}
let mut output: Vec<&str> = Vec::with_capacity(end - start);
for i in start..end {
output.push(slice[i]);
}
let count_copy = count;
for i in start..end {
let byte = slice[i].as_bytes().get(char_index).copied().unwrap_or(0) as usize;
let pos = count_copy[byte];
output[pos - count_copy[0]] = slice[i];
count[byte] += 1;
}
for (i, item) in output.into_iter().enumerate() {
slice[start + i] = item;
}
let mut prev_end = start;
for i in 0..RADIX + 1 {
let bucket_end = start + count[i] - count[0];
if bucket_end > prev_end + 1 {
self.msd_radix_sort_str_slice(
slice,
prev_end,
bucket_end,
char_index + 1,
max_len,
)?;
}
prev_end = bucket_end;
}
Ok(())
}
fn lsb_radix_sort_str_slice_impl(&self, slice: &mut [&str], max_len: usize) -> RadixResult<()> {
let mut buffer: Vec<&str> = Vec::with_capacity(slice.len());
for &s in slice.iter() {
buffer.push(s);
}
for char_index in (0..max_len).rev() {
self.counting_sort_str_by_char_impl(slice, &mut buffer, char_index);
slice.swap_with_slice(&mut buffer);
}
Ok(())
}
fn counting_sort_str_by_char_impl<'a>(
&self,
input: &mut [&'a str],
output: &mut [&'a str],
char_index: usize,
) {
const RADIX: usize = 256;
let mut count = [0usize; RADIX + 1];
let len = input.len();
for item in input.iter() {
let byte = item.as_bytes().get(char_index).copied().unwrap_or(0) as usize;
count[byte + 1] += 1;
}
for i in 1..count.len() {
count[i] += count[i - 1];
}
for i in 0..len {
let byte = input[i].as_bytes().get(char_index).copied().unwrap_or(0) as usize;
output[count[byte]] = input[i];
count[byte] += 1;
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{RadixDataType, SortDirection};
#[test]
fn test_sort_strings_basic() {
let mut strings = vec![
"banana".to_string(),
"apple".to_string(),
"cherry".to_string(),
];
let sorter = RadixSort::<String>::new(RadixDataType::String, SortDirection::Ascending);
sorter.sort(&mut strings).unwrap();
assert_eq!(strings, vec!["apple", "banana", "cherry"]);
}
#[test]
fn test_sort_strings_different_lengths() {
let mut strings = vec!["a".to_string(), "abc".to_string(), "ab".to_string()];
let sorter = RadixSort::<String>::default();
sorter.sort(&mut strings).unwrap();
assert_eq!(strings, vec!["a", "ab", "abc"]);
}
#[test]
fn test_sort_strings_descending() {
let mut strings = vec![
"banana".to_string(),
"apple".to_string(),
"cherry".to_string(),
];
let sorter = RadixSort::<String>::new(RadixDataType::String, SortDirection::Descending);
sorter.sort(&mut strings).unwrap();
assert_eq!(strings, vec!["cherry", "banana", "apple"]);
}
#[test]
fn test_sort_strings_empty() {
let mut strings: Vec<String> = vec![];
let sorter = RadixSort::<String>::default();
sorter.sort(&mut strings).unwrap();
assert!(strings.is_empty());
}
#[test]
fn test_sort_strings_with_empty_strings() {
let mut strings = vec!["b".to_string(), "".to_string(), "a".to_string()];
let sorter = RadixSort::<String>::default();
sorter.sort(&mut strings).unwrap();
assert_eq!(strings, vec!["", "a", "b"]);
}
#[test]
fn test_sort_str_slice() {
let mut strings = vec!["banana", "apple", "cherry"];
let sorter = RadixSort::<&str>::default();
sorter.sort(&mut strings).unwrap();
assert_eq!(strings, vec!["apple", "banana", "cherry"]);
}
}