algorithms_fourth/string/
msd.rs

1//! 高位优先的字符串排序
2//!
3//!  #  使用
4//! ```
5//!     use algorithms_fourth::string::msd::sort;
6//!         let mut src = vec!["she","sells","seashells","by","the","sea","shore","the","shells","she","sells","are","surely","seashells"];
7//!         sort(&mut src);
8//!         println!("{:?}",src);
9//! ```
10use crate::insert_sort;
11/// 阀值,在阀值之下插入排序效率更高
12// const M: usize = 15;
13// 这里为了验证高位优先算法的正确性,所以置0
14const M: usize = 0;
15/// 基数
16const R: usize = 256;
17pub fn sort(src: &mut Vec<&str>) {
18    let n = src.len();
19    let mut aux = vec![""; n];
20    sort_in_range(src, &mut aux, 0, n - 1, 0);
21}
22
23fn sort_in_range<'a>(src: &mut [&'a str], aux: &mut [&'a str], lo: usize, hi: usize, d: usize) {
24    if hi <= lo {
25        return;
26    }
27    if hi <= lo + M {
28        // 数据量少时插入排序性价比更高
29        insert_sort(src);
30        return;
31    }
32    let mut count = [0; R + 2];
33    for i in lo..=hi {
34        // 计算频率。偏移2是为了在频率转化为索引后依然能存有下一位排序需要的区间信息
35        // 因为char_at 默认已经偏移了1,所以这里只偏移1
36        count[char_at(src[i], d) + 1] += 1;
37    }
38    for r in 0..R + 1 {
39        // 频率转化为索引:位置r存储小于r的元素个数
40        count[r + 1] += count[r];
41    }
42
43    (lo..=hi).for_each(|i| {
44        // 偏移一位使得,排序结束后count前n个数据刚好是下一步所需要的区间信息
45        // 因为char_at默认已经偏移了1,所以这里无需偏移
46        let pos = char_at(src[i], d);
47        // 注意:count中的值是从0开始的,而i是从lo开始的
48        aux[count[pos]] = src[i];
49        count[pos] += 1;
50    });
51    (lo..=hi).for_each(|i| {
52        // 回写数据。aux的索引是从0开始的
53        src[i] = aux[i - lo];
54    });
55    let d = d + 1;
56    for i in 0..R {
57        if count[i] < count[i + 1] {
58            sort_in_range(src, aux, lo + count[i], lo + count[i + 1] - 1, d);
59        }
60    }
61}
62/// 获取对应位置字符的数字大小
63/// 0:没有
64/// 其他:真实位置+1
65fn char_at(s: &str, d: usize) -> usize {
66    s.as_bytes().get(d).map_or(0, |&f| f as usize + 1)
67}
68#[cfg(test)]
69mod test {
70    use super::sort;
71
72    #[test]
73    fn test() {
74        let mut src = vec![
75            "she",
76            "a",
77            "",
78            "sells",
79            "seashells",
80            "by",
81            "the",
82            "sea",
83            "shore",
84            "the",
85            "shells",
86            "shea",
87            "sells",
88            "are",
89            "surely",
90            "seashells",
91        ];
92        sort(&mut src);
93        println!("{:?}", src);
94    }
95}