unicode_shaper/shape/cjk.rs
1/// Chinese-Japanese-Korean (CJK) characters
2pub static CJK: [[u16; 2]; 14] = [
3 // CJK Unified Ideographs [Han] (Range: 4E00–9FFF)
4 [0x4E00, 0x9FFF],
5 // CJK Unified Ideographs Extension A (Range: 3400–4DBF)
6 [0x3400, 0x4DBF],
7 // NOTE: These are u32 values
8 // CJK Unified Ideographs Extension B (Range: 20000–2A6DF)
9 // [0x20000, 0x2A6DF],
10 // CJK Unified Ideographs Extension C (Range: 2A700–2B739)
11 // [0x2A700, 0x2B739],
12 // CJK Unified Ideographs Extension D (Range: 2B740–2B81D)
13 // [0x2B740, 0x2B81D],
14 // CJK Unified Ideographs Extension E (Range: 2B820–2CEA1)
15 // [0x2B820, 0x2CEA1],
16 // CJK Unified Ideographs Extension F (Range: 2CEB0–2EBE0)
17 // [0x2CEB0, 0x2EBE0],
18 // CJK Unified Ideographs Extension G (Range: 30000–3134A)
19 // [0x30000, 0x3134A],
20 // CJK Unified Ideographs Extension H (Range: 31350–323AF)
21 // [0x31350, 0x323AF],
22 // CJK Compatibility Ideographs (Range: F900–FAFF)
23 [0xF900, 0xFAFF],
24 // NOTE: These are u32 values
25 // CJK Compatibility Ideographs Supplement (2F800–2FA1F)
26 // [0x2F800, 0x2FA1F],
27 // Kangxi Radicals (Range: 2F00–2FDF)
28 [0x2F00, 0x2FDF],
29 // CJK Radicals Supplement (Range: 2E80–2EFF)
30 [0x2E80, 0x2EFF],
31 // CJK Strokes (Range: 31C0–31EF)
32 [0x31C0, 0x31EF],
33 // Ideographic Description Characters (Range: 2FF0–2FFF)
34 [0x2FF0, 0x2FFF],
35 // CJK Symbols and Punctuation (Range: 3000–303F)
36 [0x3000, 0x303F],
37 // NOTE: These are u32 values
38 // Ideographic Symbols and Punctuation (Range: 16FE0–16FFF)
39 // [0x16FE0, 0x16FFF],
40 // CJK Compatibility Forms (Range: FE30–FE4F)
41 [0xFE30, 0xFE4F],
42 // Halfwidth and Fullwidth Forms (Range: FF00–FFEF)
43 [0xFF00, 0xFFEF],
44 // Small Form Variants (Range: FE50–FE6F)
45 [0xFE50, 0xFE6F],
46 // Vertical Forms (Range: FE10–FE1F)
47 [0xFE10, 0xFE1F],
48 // Enclosed CJK Letters and Months (Range: 3200–32FF)
49 [0x3200, 0x32FF],
50 // CJK Compatibility (Range: 3300–33FF)
51 [0x3300, 0x33FF],
52];
53// 56 bytes
54
55/// Check if a character is CJK (Chinese, Japanese, or Korean)
56pub fn is_cjk(c: &u16) -> bool {
57 for arr in CJK {
58 if *c >= arr[0] && *c <= arr[1] {
59 return true;
60 }
61 }
62 false
63}
64
65#[cfg(test)]
66mod tests {
67 use super::*;
68
69 #[test]
70 fn check_is_cjk() {
71 assert!(is_cjk(&0x4E00));
72 assert!(!is_cjk(&0x01));
73 }
74}