lang_unicodes/
lib.rs

1use cjk_unicodes::{
2    HANGUL_JAMO, HANGUL_SYL, HIRAGANA_AND_KATAKANA, ZH_SC, ZH_SYMBOL, ZH_TC,
3};
4use lazy_static::lazy_static;
5pub mod cjk_unicodes;
6pub fn expand_ranges(ranges: &[(u32, u32)]) -> Vec<u32> {
7    ranges
8        .iter()
9        .flat_map(|&(start, end)| (start..=end).collect::<Vec<u32>>())
10        .collect()
11}
12lazy_static! {
13    /**
14     * Latin 范围替换
15     * @link https://npmmirror.com/package/@fontsource/noto-sans/files/400.css?version=5.0.22#L61
16     * 0 不归入此,一般 0 是用于占位的
17     */
18    pub static ref LATIN: Vec<u32> = expand_ranges(&[(0x0001, 0x00FF)]);
19    pub static ref LATIN_EXT_A: Vec<u32> = expand_ranges(&[(0x0100, 0x017F)]);
20    pub static ref LATIN_EXT_B: Vec<u32> = expand_ranges(&[(0x0180, 0x024F)]);
21
22
23
24    pub static ref GREEK: Vec<u32> = expand_ranges(&[(0x0370, 0x03FF), (0x1F00, 0x1FFF)]);
25
26    /// 西里尔文范围
27    pub static ref CYRILLIC: Vec<u32> = expand_ranges(&[
28        (0x0400, 0x052F),
29        (0x1C80, 0x1C8F),
30        (0x2DE0, 0x2DFF),
31        (0xA640, 0xA69F)
32    ]);
33
34    /**
35     * 阿拉伯文范围
36     */
37    pub static ref ARABIC: Vec<u32> = expand_ranges(&[
38        (0x0600, 0x06FF),
39        (0x0750, 0x077F),
40        (0x0870, 0x08FF),
41        (0xFB50, 0xFDFF),
42        (0xFE70, 0xFEFF)
43    ]);
44
45    /**
46     * 孟加拉语
47     */
48    pub static ref BENGALI: Vec<u32> = expand_ranges(&[(0x0980, 0x09FF)]);
49
50    /**
51     * 天城文
52     */
53    pub static ref DEVANAGARI: Vec<u32> = expand_ranges(&[
54        (0x0900, 0x097F),
55        (0xA8E0, 0xA8FF),
56        (0x11B00, 0x11B5F)
57    ]);
58
59    /** 泰文 */
60    pub static ref THAI: Vec<u32> = expand_ranges(&[(0x0E00, 0x0E7F)]);
61
62    /** 高棉 */
63    pub static ref KHMER: Vec<u32> = expand_ranges(&[
64        (0x1780, 0x17FF),
65        (0x19E0, 0x19FF)
66    ]);
67
68    // 少数民族的文字
69
70    /** 藏文 */
71    pub static ref TIBETAN: Vec<u32> = expand_ranges(&[(0x0F00, 0x0FFF)]);
72
73    /** 蒙古文 */
74    pub static ref MONGOLIAN: Vec<u32> = expand_ranges(&[(0x1800, 0x18AF)]);
75
76    /** 傣文 */
77    pub static ref TAI_LE: Vec<u32> = expand_ranges(&[(0x1950, 0x197F)]);
78
79    /** 西双版纳傣文 */
80    pub static ref TAI_LUE: Vec<u32> = expand_ranges(&[(0x1980, 0x19DF)]);
81
82    /** 彝文 */
83    pub static ref YI: Vec<u32> = expand_ranges(&[
84        (0xA000, 0xA48F),
85        (0xA490, 0xA4C6)
86    ]);
87
88    /** 八思巴文 */
89    pub static ref PHAGS_PA: Vec<u32> = expand_ranges(&[(0xA840, 0xA87F)]);
90
91    /**
92     * 朝鲜文
93     * 采用韩文的解析方式
94     */
95
96    /** 傈僳文 */
97    pub static ref LISU: Vec<u32> = expand_ranges(&[(0x10C00, 0x10C4F)]);
98
99    /** 布依文 */
100    pub static ref BUHID: Vec<u32> = expand_ranges(&[(0x1740, 0x175F)]);
101
102    /** 苗文 */
103    pub static ref MIAO: Vec<u32> = expand_ranges(&[(0x16F00, 0x16F9F)]);
104
105    /** 哈尼文 */
106    pub static ref HANI: Vec<u32> = expand_ranges(&[(0x13A0, 0x13F5)]);
107
108    /** 拉祜文 */
109    pub static ref LAHU: Vec<u32> = expand_ranges(&[(0x10900, 0x1091F)]);
110
111    /** 佤文 */
112    pub static ref VA: Vec<u32> = expand_ranges(&[(0x10A00, 0x10A5F)]);
113
114    /** 壮文 */
115    pub static ref ZHUANG: Vec<u32> = expand_ranges(&[(0x10D30, 0x10D7F)]);
116
117    /** 纳西文 */
118    pub static ref NAXI_DONGBA: Vec<u32> = expand_ranges(&[(0x10FB0, 0x10FDF)]);
119
120}
121
122#[cfg(test)]
123mod tests {
124    use super::*;
125    #[test]
126    fn test() {
127        assert_eq!(LATIN.len(), 255)
128    }
129}
130
131pub fn create_default_unicode_area() -> [Vec<u32>; 29] {
132    [
133        LATIN.to_vec(),
134        LATIN_EXT_A.to_vec(),
135        LATIN_EXT_B.to_vec(),
136        GREEK.to_vec(),
137        CYRILLIC.to_vec(),
138        // 中文处理
139        ZH_SYMBOL.to_vec(),
140        ZH_SC.to_vec(),
141        ZH_TC.to_vec(),
142        // 日文处理
143        HIRAGANA_AND_KATAKANA.to_vec(),
144        // 韩文处理
145        HANGUL_JAMO.to_vec(),
146        HANGUL_SYL.to_vec(),
147        BENGALI.to_vec(),
148        ARABIC.to_vec(),
149        DEVANAGARI.to_vec(),
150        THAI.to_vec(),
151        KHMER.to_vec(),
152        TIBETAN.to_vec(),
153        MONGOLIAN.to_vec(),
154        TAI_LUE.to_vec(),
155        YI.to_vec(),
156        PHAGS_PA.to_vec(),
157        LISU.to_vec(),
158        BUHID.to_vec(),
159        MIAO.to_vec(),
160        HANI.to_vec(),
161        LAHU.to_vec(),
162        VA.to_vec(),
163        ZHUANG.to_vec(),
164        NAXI_DONGBA.to_vec(),
165    ]
166}