1use cjk_unicodes::{
2 HANGUL_JAMO, HANGUL_SYL, HIRAGANA_AND_KATAKANA, ZH_SC, ZH_SYMBOL, ZH_TC,
3};
4use lazy_static::lazy_static;
5pub mod cjk_unicodes;
6pub fn expand_ranges(ranges: &[(u32, u32)]) -> Vec<u32> {
7 ranges
8 .iter()
9 .flat_map(|&(start, end)| (start..=end).collect::<Vec<u32>>())
10 .collect()
11}
12lazy_static! {
13 pub static ref LATIN: Vec<u32> = expand_ranges(&[(0x0001, 0x00FF)]);
19 pub static ref LATIN_EXT_A: Vec<u32> = expand_ranges(&[(0x0100, 0x017F)]);
20 pub static ref LATIN_EXT_B: Vec<u32> = expand_ranges(&[(0x0180, 0x024F)]);
21
22
23
24 pub static ref GREEK: Vec<u32> = expand_ranges(&[(0x0370, 0x03FF), (0x1F00, 0x1FFF)]);
25
26 pub static ref CYRILLIC: Vec<u32> = expand_ranges(&[
28 (0x0400, 0x052F),
29 (0x1C80, 0x1C8F),
30 (0x2DE0, 0x2DFF),
31 (0xA640, 0xA69F)
32 ]);
33
34 pub static ref ARABIC: Vec<u32> = expand_ranges(&[
38 (0x0600, 0x06FF),
39 (0x0750, 0x077F),
40 (0x0870, 0x08FF),
41 (0xFB50, 0xFDFF),
42 (0xFE70, 0xFEFF)
43 ]);
44
45 pub static ref BENGALI: Vec<u32> = expand_ranges(&[(0x0980, 0x09FF)]);
49
50 pub static ref DEVANAGARI: Vec<u32> = expand_ranges(&[
54 (0x0900, 0x097F),
55 (0xA8E0, 0xA8FF),
56 (0x11B00, 0x11B5F)
57 ]);
58
59 pub static ref THAI: Vec<u32> = expand_ranges(&[(0x0E00, 0x0E7F)]);
61
62 pub static ref KHMER: Vec<u32> = expand_ranges(&[
64 (0x1780, 0x17FF),
65 (0x19E0, 0x19FF)
66 ]);
67
68 pub static ref TIBETAN: Vec<u32> = expand_ranges(&[(0x0F00, 0x0FFF)]);
72
73 pub static ref MONGOLIAN: Vec<u32> = expand_ranges(&[(0x1800, 0x18AF)]);
75
76 pub static ref TAI_LE: Vec<u32> = expand_ranges(&[(0x1950, 0x197F)]);
78
79 pub static ref TAI_LUE: Vec<u32> = expand_ranges(&[(0x1980, 0x19DF)]);
81
82 pub static ref YI: Vec<u32> = expand_ranges(&[
84 (0xA000, 0xA48F),
85 (0xA490, 0xA4C6)
86 ]);
87
88 pub static ref PHAGS_PA: Vec<u32> = expand_ranges(&[(0xA840, 0xA87F)]);
90
91 pub static ref LISU: Vec<u32> = expand_ranges(&[(0x10C00, 0x10C4F)]);
98
99 pub static ref BUHID: Vec<u32> = expand_ranges(&[(0x1740, 0x175F)]);
101
102 pub static ref MIAO: Vec<u32> = expand_ranges(&[(0x16F00, 0x16F9F)]);
104
105 pub static ref HANI: Vec<u32> = expand_ranges(&[(0x13A0, 0x13F5)]);
107
108 pub static ref LAHU: Vec<u32> = expand_ranges(&[(0x10900, 0x1091F)]);
110
111 pub static ref VA: Vec<u32> = expand_ranges(&[(0x10A00, 0x10A5F)]);
113
114 pub static ref ZHUANG: Vec<u32> = expand_ranges(&[(0x10D30, 0x10D7F)]);
116
117 pub static ref NAXI_DONGBA: Vec<u32> = expand_ranges(&[(0x10FB0, 0x10FDF)]);
119
120}
121
122#[cfg(test)]
123mod tests {
124 use super::*;
125 #[test]
126 fn test() {
127 assert_eq!(LATIN.len(), 255)
128 }
129}
130
131pub fn create_default_unicode_area() -> [Vec<u32>; 29] {
132 [
133 LATIN.to_vec(),
134 LATIN_EXT_A.to_vec(),
135 LATIN_EXT_B.to_vec(),
136 GREEK.to_vec(),
137 CYRILLIC.to_vec(),
138 ZH_SYMBOL.to_vec(),
140 ZH_SC.to_vec(),
141 ZH_TC.to_vec(),
142 HIRAGANA_AND_KATAKANA.to_vec(),
144 HANGUL_JAMO.to_vec(),
146 HANGUL_SYL.to_vec(),
147 BENGALI.to_vec(),
148 ARABIC.to_vec(),
149 DEVANAGARI.to_vec(),
150 THAI.to_vec(),
151 KHMER.to_vec(),
152 TIBETAN.to_vec(),
153 MONGOLIAN.to_vec(),
154 TAI_LUE.to_vec(),
155 YI.to_vec(),
156 PHAGS_PA.to_vec(),
157 LISU.to_vec(),
158 BUHID.to_vec(),
159 MIAO.to_vec(),
160 HANI.to_vec(),
161 LAHU.to_vec(),
162 VA.to_vec(),
163 ZHUANG.to_vec(),
164 NAXI_DONGBA.to_vec(),
165 ]
166}
167pub fn create_default_unicode_area_tag() -> [&'static str; 29] {
168 [
169 "LATIN",
170 "LATIN_EXT_A",
171 "LATIN_EXT_B",
172 "GREEK",
173 "CYRILLIC",
174 "ZH_SYMBOL",
176 "ZH_SC",
177 "ZH_TC",
178 "HIRAGANA_AND_KATAKANA",
180 "HANGUL_JAMO",
182 "HANGUL_SYL",
183 "BENGALI",
184 "ARABIC",
185 "DEVANAGARI",
186 "THAI",
187 "KHMER",
188 "TIBETAN",
189 "MONGOLIAN",
190 "TAI_LUE",
191 "YI",
192 "PHAGS_PA",
193 "LISU",
194 "BUHID",
195 "MIAO",
196 "HANI",
197 "LAHU",
198 "VA",
199 "ZHUANG",
200 "NAXI_DONGBA",
201 ]
202}