1use cjk_unicodes::{
2 HANGUL_JAMO, HANGUL_SYL, HIRAGANA_AND_KATAKANA, ZH_COMMON, ZH_SC, ZH_TC,
3};
4use lazy_static::lazy_static;
5pub mod cjk_unicodes;
6pub fn expand_ranges(ranges: &[(u32, u32)]) -> Vec<u32> {
7 ranges
8 .iter()
9 .flat_map(|&(start, end)| (start..=end).collect::<Vec<u32>>())
10 .collect()
11}
12lazy_static! {
13 pub static ref LATIN: Vec<u32> = expand_ranges(&[(0x0001, 0x024F)]);
19 pub static ref GREEK: Vec<u32> = expand_ranges(&[(0x0370, 0x03FF), (0x1F00, 0x1FFF)]);
20
21 pub static ref CYRILLIC: Vec<u32> = expand_ranges(&[
23 (0x0400, 0x052F),
24 (0x1C80, 0x1C8F),
25 (0x2DE0, 0x2DFF),
26 (0xA640, 0xA69F)
27 ]);
28
29 pub static ref ARABIC: Vec<u32> = expand_ranges(&[
33 (0x0600, 0x06FF),
34 (0x0750, 0x077F),
35 (0x0870, 0x08FF),
36 (0xFB50, 0xFDFF),
37 (0xFE70, 0xFEFF)
38 ]);
39
40 pub static ref BENGALI: Vec<u32> = expand_ranges(&[(0x0980, 0x09FF)]);
44
45 pub static ref DEVANAGARI: Vec<u32> = expand_ranges(&[
49 (0x0900, 0x097F),
50 (0xA8E0, 0xA8FF),
51 (0x11B00, 0x11B5F)
52 ]);
53
54 pub static ref THAI: Vec<u32> = expand_ranges(&[(0x0E00, 0x0E7F)]);
56
57 pub static ref KHMER: Vec<u32> = expand_ranges(&[
59 (0x1780, 0x17FF),
60 (0x19E0, 0x19FF)
61 ]);
62
63 pub static ref TIBETAN: Vec<u32> = expand_ranges(&[(0x0F00, 0x0FFF)]);
67
68 pub static ref MONGOLIAN: Vec<u32> = expand_ranges(&[(0x1800, 0x18AF)]);
70
71 pub static ref TAI_LE: Vec<u32> = expand_ranges(&[(0x1950, 0x197F)]);
73
74 pub static ref TAI_LUE: Vec<u32> = expand_ranges(&[(0x1980, 0x19DF)]);
76
77 pub static ref YI: Vec<u32> = expand_ranges(&[
79 (0xA000, 0xA48F),
80 (0xA490, 0xA4C6)
81 ]);
82
83 pub static ref PHAGS_PA: Vec<u32> = expand_ranges(&[(0xA840, 0xA87F)]);
85
86 pub static ref LISU: Vec<u32> = expand_ranges(&[(0x10C00, 0x10C4F)]);
93
94 pub static ref BUHID: Vec<u32> = expand_ranges(&[(0x1740, 0x175F)]);
96
97 pub static ref MIAO: Vec<u32> = expand_ranges(&[(0x16F00, 0x16F9F)]);
99
100 pub static ref HANI: Vec<u32> = expand_ranges(&[(0x13A0, 0x13F5)]);
102
103 pub static ref LAHU: Vec<u32> = expand_ranges(&[(0x10900, 0x1091F)]);
105
106 pub static ref VA: Vec<u32> = expand_ranges(&[(0x10A00, 0x10A5F)]);
108
109 pub static ref ZHUANG: Vec<u32> = expand_ranges(&[(0x10D30, 0x10D7F)]);
111
112 pub static ref NAXI_DONGBA: Vec<u32> = expand_ranges(&[(0x10FB0, 0x10FDF)]);
114
115}
116
117#[cfg(test)]
118mod tests {
119 use super::*;
120 #[test]
121 fn test() {
122 assert_eq!(LATIN.len(), 591)
123 }
124}
125
126pub fn create_default_unicode_area() -> [Vec<u32>; 27] {
127 [
128 LATIN.to_vec(),
129 GREEK.to_vec(),
130 CYRILLIC.to_vec(),
131 ZH_COMMON.to_vec(),
133 ZH_SC.to_vec(),
134 ZH_TC.to_vec(),
135 HIRAGANA_AND_KATAKANA.to_vec(),
137 HANGUL_JAMO.to_vec(),
139 HANGUL_SYL.to_vec(),
140 BENGALI.to_vec(),
141 ARABIC.to_vec(),
142 DEVANAGARI.to_vec(),
143 THAI.to_vec(),
144 KHMER.to_vec(),
145 TIBETAN.to_vec(),
146 MONGOLIAN.to_vec(),
147 TAI_LUE.to_vec(),
148 YI.to_vec(),
149 PHAGS_PA.to_vec(),
150 LISU.to_vec(),
151 BUHID.to_vec(),
152 MIAO.to_vec(),
153 HANI.to_vec(),
154 LAHU.to_vec(),
155 VA.to_vec(),
156 ZHUANG.to_vec(),
157 NAXI_DONGBA.to_vec(),
158 ]
159}