qi_rs/
util.rs

1use wasm_bindgen::prelude::wasm_bindgen;
2
3use crate::{
4    range::{
5        R_CHINESE, R_CLOSE_PARENTHESES, R_COMMON_SYMBOLS, R_JAPANESE, R_KOREAN, R_OPEN_PARENTHESES,
6        R_QUOTES, R_WESTERN_SENTENCE_PUNCTUATIONS,
7    },
8    table::{
9        CJK_COMPATIBILITY, CJK_COMPATIBILITY_FORMS, ENCLOSED_ALPHANUMERICS,
10        ENCLOSED_CJK_LETTERS_AND_MONTHS, GREEK_AND_COPTIC, LATIN_1_SUPPLEMENT, NUMBER_FORMS,
11    },
12};
13
14#[inline]
15pub fn bsearch_range_table(c: char, r: &[(char, char)]) -> bool {
16    use core::cmp::Ordering::{Equal, Greater, Less};
17    r.binary_search_by(|&(lo, hi)| {
18        if lo <= c && c <= hi {
19            Equal
20        } else if hi < c {
21            Less
22        } else {
23            Greater
24        }
25    })
26    .is_ok()
27}
28
29#[wasm_bindgen]
30pub fn is_chinese(c: char) -> bool {
31    bsearch_range_table(c, R_CHINESE)
32}
33
34#[wasm_bindgen]
35pub fn is_japanese(c: char) -> bool {
36    bsearch_range_table(c, R_JAPANESE) || is_chinese(c)
37}
38
39#[wasm_bindgen]
40pub fn is_korean(c: char) -> bool {
41    bsearch_range_table(c, R_KOREAN)
42}
43
44#[wasm_bindgen]
45pub fn is_cjk(c: char) -> bool {
46    bsearch_range_table(c, R_CHINESE)
47        || bsearch_range_table(c, R_JAPANESE)
48        || bsearch_range_table(c, R_KOREAN)
49}
50
51#[wasm_bindgen]
52pub fn is_number_forms(c: char) -> bool {
53    NUMBER_FORMS.0 <= c && c <= NUMBER_FORMS.1
54}
55
56#[wasm_bindgen]
57pub fn is_cjk_compatibility_forms(c: char) -> bool {
58    CJK_COMPATIBILITY_FORMS.0 <= c && c <= CJK_COMPATIBILITY_FORMS.1
59}
60
61#[wasm_bindgen]
62pub fn is_enclosed_cjk_letters_and_months(c: char) -> bool {
63    ENCLOSED_CJK_LETTERS_AND_MONTHS.0 <= c && c <= ENCLOSED_CJK_LETTERS_AND_MONTHS.1
64}
65
66#[wasm_bindgen]
67pub fn is_latin1_supplement(c: char) -> bool {
68    LATIN_1_SUPPLEMENT.0 <= c && c <= LATIN_1_SUPPLEMENT.1
69}
70
71#[wasm_bindgen]
72pub fn is_greek_and_coptic(c: char) -> bool {
73    GREEK_AND_COPTIC.0 <= c && c <= GREEK_AND_COPTIC.1
74}
75
76#[wasm_bindgen]
77pub fn is_enclosed_alphanumerics(c: char) -> bool {
78    ENCLOSED_ALPHANUMERICS.0 <= c && c <= ENCLOSED_ALPHANUMERICS.1
79}
80
81#[wasm_bindgen]
82pub fn is_cjk_compatibility(c: char) -> bool {
83    CJK_COMPATIBILITY.0 <= c && c <= CJK_COMPATIBILITY.1
84}
85
86#[wasm_bindgen]
87pub fn is_common_symbols(c: char) -> bool {
88    R_COMMON_SYMBOLS.binary_search(&c).is_ok()
89}
90
91#[wasm_bindgen]
92pub fn is_open_parentheses(c: char) -> bool {
93    R_OPEN_PARENTHESES.binary_search(&c).is_ok()
94}
95
96#[wasm_bindgen]
97pub fn is_close_parentheses(c: char) -> bool {
98    R_CLOSE_PARENTHESES.binary_search(&c).is_ok()
99}
100
101#[wasm_bindgen]
102pub fn is_western_sentence_punctuation(c: char) -> bool {
103    R_WESTERN_SENTENCE_PUNCTUATIONS.binary_search(&c).is_ok()
104}
105
106#[wasm_bindgen]
107pub fn is_colon(c: char) -> bool {
108    c == ':'
109}
110
111#[wasm_bindgen]
112pub fn is_quote(c: char) -> bool {
113    R_QUOTES.binary_search(&c).is_ok()
114}
115
116// MARK: TEST
117#[cfg(test)]
118pub mod tests {
119    use super::*;
120
121    #[test]
122    fn test_bsearch_range_table() {
123        let ranges = &[('a', 'c'), ('e', 'g')];
124
125        assert!(bsearch_range_table('b', ranges));
126        assert!(!bsearch_range_table('d', ranges));
127        assert!(bsearch_range_table('e', ranges));
128        assert!(bsearch_range_table('f', ranges));
129        assert!(!bsearch_range_table('h', ranges));
130    }
131
132    macro_rules! expect {
133        ($name: ident, $func: ident, $text: literal) => {
134            #[test]
135            fn $name() {
136                let text = $text;
137                for c in text.chars() {
138                    assert!($func(c));
139                }
140            }
141        };
142    }
143
144    macro_rules! expect_not {
145        ($name: ident, $func: ident, $text: literal) => {
146            #[test]
147            fn $name() {
148                let text = $text;
149                for c in text.chars() {
150                    assert!(!$func(c));
151                }
152            }
153        };
154    }
155
156    expect!(cjk_unified_ideographs, is_chinese, "你好世界");
157    expect_not!(
158        test_not_chinese,
159        is_chinese,
160        "123abc[]./&@^안녕こんにちは😀🔥 \n"
161    );
162    expect!(cjk_radicals_supplement, is_chinese, "⺀⺩⻯\u{2eff}");
163    expect!(kangxi_radicals, is_chinese, "⼀⼆⼋⼗⾭⿓\u{2fdf}");
164    expect!(bopomofo, is_chinese, "\u{3100}ㄅㄯ");
165    expect!(bopomofo_extended, is_chinese, "ㆠㆿ");
166    expect!(cjk_strokes, is_chinese, "㇀\u{31ef}");
167    expect_not!(
168        cjk_symbols_and_punctuation,
169        is_chinese,
170        "\u{3000}、〄〲〾\u{303f}"
171    );
172
173    expect!(test_is_japanese, is_japanese, "こんにちは世界");
174    expect_not!(test_not_japanese, is_japanese, "123abc[]./&@^안녕😀🔥 \n");
175
176    expect!(test_is_korean, is_korean, "안녕하세요세계");
177    expect_not!(
178        test_not_korean,
179        is_korean,
180        "123abc[]./&@^こんにちは你好😀🔥 \n"
181    );
182
183    expect!(test_is_number_forms, is_number_forms, "⅐\u{218f}");
184    expect!(
185        test_is_enclosed_alphanumerics,
186        is_enclosed_alphanumerics,
187        "①⓿"
188    );
189    expect!(
190        test_is_cjk_compatibility_forms,
191        is_cjk_compatibility_forms,
192        "︰﹏"
193    );
194    expect!(
195        test_is_cjk_compatibility,
196        is_cjk_compatibility,
197        "㌀㍿㎠㎪㏿"
198    );
199    expect!(
200        test_is_enclosed_cjk_letters_and_months,
201        is_enclosed_cjk_letters_and_months,
202        "㈀㋿"
203    );
204    expect!(
205        test_is_latin1_supplement,
206        is_latin1_supplement,
207        "\u{80}¥©®¼ÿ"
208    );
209    expect!(test_is_greek_and_coptic, is_greek_and_coptic, "ͰαβγϿ");
210    expect!(test_is_common_symbols, is_common_symbols, "#$%&*+-/=@\\|~");
211    expect!(test_is_open_parentheses, is_open_parentheses, "([{");
212    expect!(test_is_close_parentheses, is_close_parentheses, ")]}");
213    expect!(
214        test_is_western_sentence_punctuation,
215        is_western_sentence_punctuation,
216        "!,.;?"
217    );
218    expect!(test_is_colon, is_colon, ":");
219    expect!(test_is_quote, is_quote, "\"'");
220}