unicode_shaper/shape/
cjk.rs

1/// Chinese-Japanese-Korean (CJK) characters
2pub static CJK: [[u16; 2]; 14] = [
3    // CJK Unified Ideographs [Han] (Range: 4E00–9FFF)
4    [0x4E00, 0x9FFF],
5    // CJK Unified Ideographs Extension A (Range: 3400–4DBF)
6    [0x3400, 0x4DBF],
7    // NOTE: These are u32 values
8    // CJK Unified Ideographs Extension B (Range: 20000–2A6DF)
9    // [0x20000, 0x2A6DF],
10    // CJK Unified Ideographs Extension C (Range: 2A700–2B739)
11    // [0x2A700, 0x2B739],
12    // CJK Unified Ideographs Extension D (Range: 2B740–2B81D)
13    // [0x2B740, 0x2B81D],
14    // CJK Unified Ideographs Extension E (Range: 2B820–2CEA1)
15    // [0x2B820, 0x2CEA1],
16    // CJK Unified Ideographs Extension F (Range: 2CEB0–2EBE0)
17    // [0x2CEB0, 0x2EBE0],
18    // CJK Unified Ideographs Extension G (Range: 30000–3134A)
19    // [0x30000, 0x3134A],
20    // CJK Unified Ideographs Extension H (Range: 31350–323AF)
21    // [0x31350, 0x323AF],
22    // CJK Compatibility Ideographs (Range: F900–FAFF)
23    [0xF900, 0xFAFF],
24    // NOTE: These are u32 values
25    // CJK Compatibility Ideographs Supplement (2F800–2FA1F)
26    // [0x2F800, 0x2FA1F],
27    // Kangxi Radicals (Range: 2F00–2FDF)
28    [0x2F00, 0x2FDF],
29    // CJK Radicals Supplement (Range: 2E80–2EFF)
30    [0x2E80, 0x2EFF],
31    // CJK Strokes (Range: 31C0–31EF)
32    [0x31C0, 0x31EF],
33    // Ideographic Description Characters (Range: 2FF0–2FFF)
34    [0x2FF0, 0x2FFF],
35    // CJK Symbols and Punctuation (Range: 3000–303F)
36    [0x3000, 0x303F],
37    // NOTE: These are u32 values
38    // Ideographic Symbols and Punctuation (Range: 16FE0–16FFF)
39    // [0x16FE0, 0x16FFF],
40    // CJK Compatibility Forms (Range: FE30–FE4F)
41    [0xFE30, 0xFE4F],
42    // Halfwidth and Fullwidth Forms (Range: FF00–FFEF)
43    [0xFF00, 0xFFEF],
44    // Small Form Variants (Range: FE50–FE6F)
45    [0xFE50, 0xFE6F],
46    // Vertical Forms (Range: FE10–FE1F)
47    [0xFE10, 0xFE1F],
48    // Enclosed CJK Letters and Months (Range: 3200–32FF)
49    [0x3200, 0x32FF],
50    // CJK Compatibility (Range: 3300–33FF)
51    [0x3300, 0x33FF],
52];
53// 56 bytes
54
55/// Check if a character is CJK (Chinese, Japanese, or Korean)
56pub fn is_cjk(c: &u16) -> bool {
57    for arr in CJK {
58        if *c >= arr[0] && *c <= arr[1] {
59            return true;
60        }
61    }
62    false
63}
64
65#[cfg(test)]
66mod tests {
67    use super::*;
68
69    #[test]
70    fn check_is_cjk() {
71        assert!(is_cjk(&0x4E00));
72        assert!(!is_cjk(&0x01));
73    }
74}