1pub static CONVERT_MAP: phf::Map<char, char> = phf::phf_map! {
2 '"' => '"',
3 '#' => '#',
4 '$' => '$',
5 '%' => '%',
6 '&' => '&',
7 ''' => '\'',
8 '*' => '*',
9 '+' => '+',
10 '.' => '.',
11 '/' => '/',
12 '0' => '0',
13 '1' => '1',
14 '2' => '2',
15 '3' => '3',
16 '4' => '4',
17 '5' => '5',
18 '6' => '6',
19 '7' => '7',
20 '8' => '8',
21 '9' => '9',
22 '<' => '<',
23 '=' => '=',
24 '>' => '>',
25 '@' => '@',
26 'A' => 'A',
27 'B' => 'B',
28 'C' => 'C',
29 'D' => 'D',
30 'E' => 'E',
31 'F' => 'F',
32 'G' => 'G',
33 'H' => 'H',
34 'I' => 'I',
35 'J' => 'J',
36 'K' => 'K',
37 'L' => 'L',
38 'M' => 'M',
39 'N' => 'N',
40 'O' => 'O',
41 'P' => 'P',
42 'Q' => 'Q',
43 'R' => 'R',
44 'S' => 'S',
45 'T' => 'T',
46 'U' => 'U',
47 'V' => 'V',
48 'W' => 'W',
49 'X' => 'X',
50 'Y' => 'Y',
51 'Z' => 'Z',
52 '\' => '\\',
53 '^' => '^',
54 '`' => '`',
55 'a' => 'a',
56 'b' => 'b',
57 'c' => 'c',
58 'd' => 'd',
59 'e' => 'e',
60 'f' => 'f',
61 'g' => 'g',
62 'h' => 'h',
63 'i' => 'i',
64 'j' => 'j',
65 'k' => 'k',
66 'l' => 'l',
67 'm' => 'm',
68 'n' => 'n',
69 'o' => 'o',
70 'p' => 'p',
71 'q' => 'q',
72 'r' => 'r',
73 's' => 's',
74 't' => 't',
75 'u' => 'u',
76 'v' => 'v',
77 'w' => 'w',
78 'x' => 'x',
79 'y' => 'y',
80 'z' => 'z',
81 '{' => '{',
82 '|' => '|',
83 '}' => '}',
84 '。' => '。',
85 '「' => '「',
86 '」' => '」',
87 '、' => '、',
88 '・' => '·',
89 '•' => '·',
90 '─' => '—',
91 '―' => '—',
92 '∶' => ':',
93 '‧' => '·',
94 '・' => '·',
95 '﹑' => '、',
96 '〜' => '~',
97 '︰' => ':',
98 '?' => '?',
99 '!' => '!',
100 ',' => ',',
101 ';' => ';',
102 '(' => '(',
103 ')' => ')',
104};
105
106pub static CONVERT_T2S_MAP: phf::Map<char, char> = phf::phf_map! {
108 '妳' => '你',
109 '姊' => '姐',
110 '擡' => '抬',
111 '牠' => '它',
112 '緖' => '绪',
113 '揹' => '背',
114};
115
116#[must_use]
119#[inline]
120pub const fn is_cjk(c: char) -> bool {
121 c == '\u{3007}'
122 || range(c, '\u{3400}', '\u{4DBF}')
123 || range(c, '\u{4E00}', '\u{9FFF}')
124 || range(c, '\u{FA0E}', '\u{FA0F}')
125 || c == '\u{FA11}'
126 || range(c, '\u{FA13}', '\u{FA14}')
127 || c == '\u{FA1F}'
128 || c == '\u{FA21}'
129 || range(c, '\u{FA23}', '\u{FA24}')
130 || range(c, '\u{FA27}', '\u{FA29}')
131 || range(c, '\u{20000}', '\u{2A6DF}')
132 || range(c, '\u{2A700}', '\u{2B739}')
133 || range(c, '\u{2B740}', '\u{2B81D}')
134 || range(c, '\u{2B820}', '\u{2CEA1}')
135 || range(c, '\u{2CEB0}', '\u{2EBE0}')
136 || range(c, '\u{2EBF0}', '\u{2EE5F}')
137 || range(c, '\u{30000}', '\u{3134A}')
138 || range(c, '\u{31350}', '\u{323AF}')
139}
140
141#[must_use]
142#[inline]
143const fn range(c: char, min: char, max: char) -> bool {
144 c >= min && c <= max
145}
146
147static CHINESE_PUNCTUATION: phf::Set<char> = phf::phf_set! {
149 '。',
150 '?',
151 '!',
152 ',',
153 '、',
154 ';',
155 ':',
156 '“',
157 '”',
158 '『',
159 '』',
160 '‘',
161 '’',
162 '「',
163 '」',
164 '(',
165 ')',
166 '[',
167 ']',
168 '〔',
169 '〕',
170 '【',
171 '】',
172 '—',
174 '…',
176 '-',
177 '-',
178 '~',
179 '·',
180 '《',
181 '》',
182 '〈',
183 '〉',
184 '﹏',
186 '_',
188 '.'
189};
190
191#[must_use]
192#[inline]
193pub fn is_chinese_punctuation(c: char) -> bool {
194 CHINESE_PUNCTUATION.contains(&c)
195}
196
197static ENGLISH_PUNCTUATION: phf::Set<char> = phf::phf_set! {
199 '.',
200 '?',
201 '!',
202 ',',
203 ':',
204 '…',
205 ';',
206 '-',
207 '–',
208 '—',
209 '(',
210 ')',
211 '[',
212 ']',
213 '{',
214 '}',
215 '"',
216 '\'',
217 '/',
218};
219
220#[must_use]
221#[inline]
222pub fn is_english_punctuation(c: char) -> bool {
223 ENGLISH_PUNCTUATION.contains(&c)
224}
225
226#[must_use]
227#[inline]
228pub fn is_punctuation(c: char) -> bool {
229 is_chinese_punctuation(c) || is_english_punctuation(c)
230}
231
232#[cfg(test)]
233mod test {
234 use super::*;
235
236 #[test]
237 fn test_is_cjk() {
238 assert!(is_cjk('你'));
239 assert!(is_cjk('〇'));
240 assert!(is_cjk('䀹'));
241 assert!(is_cjk('鿃'));
242 assert!(is_cjk('\u{9FEB}'));
243 assert!(is_cjk('﨧'));
244 assert!(is_cjk('𱞈'));
245
246 assert!(!is_cjk('a'));
247 assert!(!is_cjk('🍌'));
248 }
249}