1use wasm_bindgen::prelude::wasm_bindgen;
2
3use crate::{
4 range::{
5 R_CHINESE, R_CLOSE_PARENTHESES, R_COMMON_SYMBOLS, R_JAPANESE, R_KOREAN, R_OPEN_PARENTHESES,
6 R_QUOTES, R_WESTERN_SENTENCE_PUNCTUATIONS,
7 },
8 table::{
9 CJK_COMPATIBILITY, CJK_COMPATIBILITY_FORMS, ENCLOSED_ALPHANUMERICS,
10 ENCLOSED_CJK_LETTERS_AND_MONTHS, GREEK_AND_COPTIC, LATIN_1_SUPPLEMENT, NUMBER_FORMS,
11 },
12};
13
14#[inline]
15pub fn bsearch_range_table(c: char, r: &[(char, char)]) -> bool {
16 use core::cmp::Ordering::{Equal, Greater, Less};
17 r.binary_search_by(|&(lo, hi)| {
18 if lo <= c && c <= hi {
19 Equal
20 } else if hi < c {
21 Less
22 } else {
23 Greater
24 }
25 })
26 .is_ok()
27}
28
29#[wasm_bindgen]
30pub fn is_chinese(c: char) -> bool {
31 bsearch_range_table(c, R_CHINESE)
32}
33
34#[wasm_bindgen]
35pub fn is_japanese(c: char) -> bool {
36 bsearch_range_table(c, R_JAPANESE) || is_chinese(c)
37}
38
39#[wasm_bindgen]
40pub fn is_korean(c: char) -> bool {
41 bsearch_range_table(c, R_KOREAN)
42}
43
44#[wasm_bindgen]
45pub fn is_cjk(c: char) -> bool {
46 bsearch_range_table(c, R_CHINESE)
47 || bsearch_range_table(c, R_JAPANESE)
48 || bsearch_range_table(c, R_KOREAN)
49}
50
51#[wasm_bindgen]
52pub fn is_number_forms(c: char) -> bool {
53 NUMBER_FORMS.0 <= c && c <= NUMBER_FORMS.1
54}
55
56#[wasm_bindgen]
57pub fn is_cjk_compatibility_forms(c: char) -> bool {
58 CJK_COMPATIBILITY_FORMS.0 <= c && c <= CJK_COMPATIBILITY_FORMS.1
59}
60
61#[wasm_bindgen]
62pub fn is_enclosed_cjk_letters_and_months(c: char) -> bool {
63 ENCLOSED_CJK_LETTERS_AND_MONTHS.0 <= c && c <= ENCLOSED_CJK_LETTERS_AND_MONTHS.1
64}
65
66#[wasm_bindgen]
67pub fn is_latin1_supplement(c: char) -> bool {
68 LATIN_1_SUPPLEMENT.0 <= c && c <= LATIN_1_SUPPLEMENT.1
69}
70
71#[wasm_bindgen]
72pub fn is_greek_and_coptic(c: char) -> bool {
73 GREEK_AND_COPTIC.0 <= c && c <= GREEK_AND_COPTIC.1
74}
75
76#[wasm_bindgen]
77pub fn is_enclosed_alphanumerics(c: char) -> bool {
78 ENCLOSED_ALPHANUMERICS.0 <= c && c <= ENCLOSED_ALPHANUMERICS.1
79}
80
81#[wasm_bindgen]
82pub fn is_cjk_compatibility(c: char) -> bool {
83 CJK_COMPATIBILITY.0 <= c && c <= CJK_COMPATIBILITY.1
84}
85
86#[wasm_bindgen]
87pub fn is_common_symbols(c: char) -> bool {
88 R_COMMON_SYMBOLS.binary_search(&c).is_ok()
89}
90
91#[wasm_bindgen]
92pub fn is_open_parentheses(c: char) -> bool {
93 R_OPEN_PARENTHESES.binary_search(&c).is_ok()
94}
95
96#[wasm_bindgen]
97pub fn is_close_parentheses(c: char) -> bool {
98 R_CLOSE_PARENTHESES.binary_search(&c).is_ok()
99}
100
101#[wasm_bindgen]
102pub fn is_western_sentence_punctuation(c: char) -> bool {
103 R_WESTERN_SENTENCE_PUNCTUATIONS.binary_search(&c).is_ok()
104}
105
106#[wasm_bindgen]
107pub fn is_colon(c: char) -> bool {
108 c == ':'
109}
110
111#[wasm_bindgen]
112pub fn is_quote(c: char) -> bool {
113 R_QUOTES.binary_search(&c).is_ok()
114}
115
116#[cfg(test)]
118pub mod tests {
119 use super::*;
120
121 #[test]
122 fn test_bsearch_range_table() {
123 let ranges = &[('a', 'c'), ('e', 'g')];
124
125 assert!(bsearch_range_table('b', ranges));
126 assert!(!bsearch_range_table('d', ranges));
127 assert!(bsearch_range_table('e', ranges));
128 assert!(bsearch_range_table('f', ranges));
129 assert!(!bsearch_range_table('h', ranges));
130 }
131
132 macro_rules! expect {
133 ($name: ident, $func: ident, $text: literal) => {
134 #[test]
135 fn $name() {
136 let text = $text;
137 for c in text.chars() {
138 assert!($func(c));
139 }
140 }
141 };
142 }
143
144 macro_rules! expect_not {
145 ($name: ident, $func: ident, $text: literal) => {
146 #[test]
147 fn $name() {
148 let text = $text;
149 for c in text.chars() {
150 assert!(!$func(c));
151 }
152 }
153 };
154 }
155
156 expect!(cjk_unified_ideographs, is_chinese, "你好世界");
157 expect_not!(
158 test_not_chinese,
159 is_chinese,
160 "123abc[]./&@^안녕こんにちは😀🔥 \n"
161 );
162 expect!(cjk_radicals_supplement, is_chinese, "⺀⺩⻯\u{2eff}");
163 expect!(kangxi_radicals, is_chinese, "⼀⼆⼋⼗⾭⿓\u{2fdf}");
164 expect!(bopomofo, is_chinese, "\u{3100}ㄅㄯ");
165 expect!(bopomofo_extended, is_chinese, "ㆠㆿ");
166 expect!(cjk_strokes, is_chinese, "㇀\u{31ef}");
167 expect_not!(
168 cjk_symbols_and_punctuation,
169 is_chinese,
170 "\u{3000}、〄〲〾\u{303f}"
171 );
172
173 expect!(test_is_japanese, is_japanese, "こんにちは世界");
174 expect_not!(test_not_japanese, is_japanese, "123abc[]./&@^안녕😀🔥 \n");
175
176 expect!(test_is_korean, is_korean, "안녕하세요세계");
177 expect_not!(
178 test_not_korean,
179 is_korean,
180 "123abc[]./&@^こんにちは你好😀🔥 \n"
181 );
182
183 expect!(test_is_number_forms, is_number_forms, "⅐\u{218f}");
184 expect!(
185 test_is_enclosed_alphanumerics,
186 is_enclosed_alphanumerics,
187 "①⓿"
188 );
189 expect!(
190 test_is_cjk_compatibility_forms,
191 is_cjk_compatibility_forms,
192 "︰﹏"
193 );
194 expect!(
195 test_is_cjk_compatibility,
196 is_cjk_compatibility,
197 "㌀㍿㎠㎪㏿"
198 );
199 expect!(
200 test_is_enclosed_cjk_letters_and_months,
201 is_enclosed_cjk_letters_and_months,
202 "㈀㋿"
203 );
204 expect!(
205 test_is_latin1_supplement,
206 is_latin1_supplement,
207 "\u{80}¥©®¼ÿ"
208 );
209 expect!(test_is_greek_and_coptic, is_greek_and_coptic, "ͰαβγϿ");
210 expect!(test_is_common_symbols, is_common_symbols, "#$%&*+-/=@\\|~");
211 expect!(test_is_open_parentheses, is_open_parentheses, "([{");
212 expect!(test_is_close_parentheses, is_close_parentheses, ")]}");
213 expect!(
214 test_is_western_sentence_punctuation,
215 is_western_sentence_punctuation,
216 "!,.;?"
217 );
218 expect!(test_is_colon, is_colon, ":");
219 expect!(test_is_quote, is_quote, "\"'");
220}