1pub struct UnicodeRange;
2
3impl UnicodeRange {
4 pub fn parse(input: &str) -> Vec<u32> {
5 let mut result = Vec::new();
6 let ranges = input.split(',').map(str::trim);
7
8 for range in ranges {
9 if range.contains('-') {
10 let parts: Vec<&str> = range.split('-').collect();
11 if parts.len() == 2 {
12 if let (Ok(start), Ok(end)) = (
13 u32::from_str_radix(
14 parts[0].trim_start_matches("U+"),
15 16,
16 ),
17 u32::from_str_radix(
18 parts[1].trim_start_matches("U+"),
19 16,
20 ),
21 ) {
22 for code_point in start..=end {
23 result.push(code_point);
24 }
25 }
26 }
27 } else if range.contains('?') {
28 let base = range.trim_start_matches("U+").replace('?', "0");
29 if let Ok(base_value) = u32::from_str_radix(&base, 16) {
30 let wildcard_count =
31 range.chars().filter(|&c| c == '?').count();
32 let start = base_value;
33 let end =
34 base_value + (16u32.pow(wildcard_count as u32) - 1);
35 for code_point in start..=end {
36 result.push(code_point);
37 }
38 }
39 } else {
40 if let Ok(code_point) =
41 u32::from_str_radix(range.trim_start_matches("U+"), 16)
42 {
43 result.push(code_point);
44 }
45 }
46 }
47
48 result
49 }
50 pub fn stringify(arr: &Vec<u32>) -> String {
51 fn range_string(start: u32, end: Option<u32>) -> String {
52 match end {
53 None => format!("U+{:X}", start),
54 Some(end) => {
55 if start == end {
56 format!("U+{:X}", start)
57 } else {
58 format!("U+{:X}-{:X}", start, end)
59 }
60 }
61 }
62 }
63 let mut sorted: Vec<u32> = arr.iter().cloned().collect();
64 sorted.sort_unstable();
65 sorted.dedup();
66
67 let mut results: Vec<String> = Vec::new();
68 let mut range_start: Option<u32> = None;
69
70 for (idx, ¤t) in sorted.iter().enumerate() {
71 let prev = if idx > 0 { Some(sorted[idx - 1]) } else { None };
72
73 if let Some(start) = range_start {
74 if let Some(prev) = prev {
75 if current - prev != 1 {
76 results.push(range_string(start, Option::from(prev)));
77 range_start = Some(current);
78 }
79 }
80 }
81
82 if range_start.is_none() {
83 range_start = Some(current);
84 }
85
86 if idx == sorted.len() - 1 {
87 if let Some(start) = range_start {
88 if start == current {
89 results.push(range_string(current, None));
90 } else {
91 results.push(range_string(start, Some(current)));
92 }
93 }
94 }
95 }
96
97 results.join(",")
98 }
99}
100
101#[cfg(test)]
102mod tests {
103 use super::*;
104
105 #[test]
106 fn parse() {
107 let test_cases =
108 vec!["U+ff65", "U+0-7F", "U+007F-00FF", "U+4??"].join(",");
109
110 let mut data = UnicodeRange::parse(&test_cases);
111 let mut target: Vec<u32> = vec![0xff65];
112
113 (0x0..=0x7f).into_iter().for_each(|x| target.push(x));
114 (0x7f..=0xff).into_iter().for_each(|x| target.push(x));
115 (0x400..=0x4ff).into_iter().for_each(|x| target.push(x));
116
117 target.sort(); data.sort(); assert_eq!(data.len(), target.len());
121 assert_eq!(data, target)
122 }
123
124 #[test]
125 fn stringify() {
126 let mut target: Vec<u32> = vec![0xff65];
127
128 (0x0..=0x7f).into_iter().for_each(|x| target.push(x));
129 (0x7f..=0xff).into_iter().for_each(|x| target.push(x));
130 (0x400..=0x4ff).into_iter().for_each(|x| target.push(x));
131 let res = UnicodeRange::stringify(&target);
132 assert_eq!(res, "U+0-FF,U+400-4FF,U+FF65")
133 }
134}