unicode_range/
lib.rs

1pub struct UnicodeRange;
2
3impl UnicodeRange {
4    pub fn parse(input: &str) -> Vec<u32> {
5        let mut result = Vec::new();
6        let ranges = input.split(',').map(str::trim);
7
8        for range in ranges {
9            if range.contains('-') {
10                let parts: Vec<&str> = range.split('-').collect();
11                if parts.len() == 2 {
12                    if let (Ok(start), Ok(end)) = (
13                        u32::from_str_radix(
14                            parts[0].trim_start_matches("U+"),
15                            16,
16                        ),
17                        u32::from_str_radix(
18                            parts[1].trim_start_matches("U+"),
19                            16,
20                        ),
21                    ) {
22                        for code_point in start..=end {
23                            result.push(code_point);
24                        }
25                    }
26                }
27            } else if range.contains('?') {
28                let base = range.trim_start_matches("U+").replace('?', "0");
29                if let Ok(base_value) = u32::from_str_radix(&base, 16) {
30                    let wildcard_count =
31                        range.chars().filter(|&c| c == '?').count();
32                    let start = base_value;
33                    let end =
34                        base_value + (16u32.pow(wildcard_count as u32) - 1);
35                    for code_point in start..=end {
36                        result.push(code_point);
37                    }
38                }
39            } else {
40                if let Ok(code_point) =
41                    u32::from_str_radix(range.trim_start_matches("U+"), 16)
42                {
43                    result.push(code_point);
44                }
45            }
46        }
47
48        result
49    }
50    pub fn stringify(arr: &Vec<u32>) -> String {
51        fn range_string(start: u32, end: Option<u32>) -> String {
52            match end {
53                None => format!("U+{:X}", start),
54                Some(end) => {
55                    if start == end {
56                        format!("U+{:X}", start)
57                    } else {
58                        format!("U+{:X}-{:X}", start, end)
59                    }
60                }
61            }
62        }
63        let mut sorted: Vec<u32> = arr.iter().cloned().collect();
64        sorted.sort_unstable();
65        sorted.dedup();
66
67        let mut results: Vec<String> = Vec::new();
68        let mut range_start: Option<u32> = None;
69
70        for (idx, &current) in sorted.iter().enumerate() {
71            let prev = if idx > 0 { Some(sorted[idx - 1]) } else { None };
72
73            if let Some(start) = range_start {
74                if let Some(prev) = prev {
75                    if current - prev != 1 {
76                        results.push(range_string(start, Option::from(prev)));
77                        range_start = Some(current);
78                    }
79                }
80            }
81
82            if range_start.is_none() {
83                range_start = Some(current);
84            }
85
86            if idx == sorted.len() - 1 {
87                if let Some(start) = range_start {
88                    if start == current {
89                        results.push(range_string(current, None));
90                    } else {
91                        results.push(range_string(start, Some(current)));
92                    }
93                }
94            }
95        }
96
97        results.join(",")
98    }
99}
100
101#[cfg(test)]
102mod tests {
103    use super::*;
104
105    #[test]
106    fn parse() {
107        let test_cases =
108            vec!["U+ff65", "U+0-7F", "U+007F-00FF", "U+4??"].join(",");
109
110        let mut data = UnicodeRange::parse(&test_cases);
111        let mut target: Vec<u32> = vec![0xff65];
112
113        (0x0..=0x7f).into_iter().for_each(|x| target.push(x));
114        (0x7f..=0xff).into_iter().for_each(|x| target.push(x));
115        (0x400..=0x4ff).into_iter().for_each(|x| target.push(x));
116
117        target.sort(); // 确保排序一致
118        data.sort(); // 确保排序一致
119
120        assert_eq!(data.len(), target.len());
121        assert_eq!(data, target)
122    }
123
124    #[test]
125    fn stringify() {
126        let mut target: Vec<u32> = vec![0xff65];
127
128        (0x0..=0x7f).into_iter().for_each(|x| target.push(x));
129        (0x7f..=0xff).into_iter().for_each(|x| target.push(x));
130        (0x400..=0x4ff).into_iter().for_each(|x| target.push(x));
131        let res = UnicodeRange::stringify(&target);
132        assert_eq!(res, "U+0-FF,U+400-4FF,U+FF65")
133    }
134}