lang_unicodes/
lib.rs

1use cjk_unicodes::{
2    HANGUL_JAMO, HANGUL_SYL, HIRAGANA_AND_KATAKANA, ZH_COMMON, ZH_SC, ZH_TC,
3};
4use lazy_static::lazy_static;
5pub mod cjk_unicodes;
6pub fn expand_ranges(ranges: &[(u32, u32)]) -> Vec<u32> {
7    ranges
8        .iter()
9        .flat_map(|&(start, end)| (start..=end).collect::<Vec<u32>>())
10        .collect()
11}
12lazy_static! {
13    /**
14     * Latin 范围替换
15     * @link https://npmmirror.com/package/@fontsource/noto-sans/files/400.css?version=5.0.22#L61
16     * 0 不归入此,一般 0 是用于占位的
17     */
18    pub static ref LATIN: Vec<u32> = expand_ranges(&[(0x0001, 0x024F)]);
19    pub static ref GREEK: Vec<u32> = expand_ranges(&[(0x0370, 0x03FF), (0x1F00, 0x1FFF)]);
20
21    /// 西里尔文范围
22    pub static ref CYRILLIC: Vec<u32> = expand_ranges(&[
23        (0x0400, 0x052F),
24        (0x1C80, 0x1C8F),
25        (0x2DE0, 0x2DFF),
26        (0xA640, 0xA69F)
27    ]);
28
29    /**
30     * 阿拉伯文范围
31     */
32    pub static ref ARABIC: Vec<u32> = expand_ranges(&[
33        (0x0600, 0x06FF),
34        (0x0750, 0x077F),
35        (0x0870, 0x08FF),
36        (0xFB50, 0xFDFF),
37        (0xFE70, 0xFEFF)
38    ]);
39
40    /**
41     * 孟加拉语
42     */
43    pub static ref BENGALI: Vec<u32> = expand_ranges(&[(0x0980, 0x09FF)]);
44
45    /**
46     * 天城文
47     */
48    pub static ref DEVANAGARI: Vec<u32> = expand_ranges(&[
49        (0x0900, 0x097F),
50        (0xA8E0, 0xA8FF),
51        (0x11B00, 0x11B5F)
52    ]);
53
54    /** 泰文 */
55    pub static ref THAI: Vec<u32> = expand_ranges(&[(0x0E00, 0x0E7F)]);
56
57    /** 高棉 */
58    pub static ref KHMER: Vec<u32> = expand_ranges(&[
59        (0x1780, 0x17FF),
60        (0x19E0, 0x19FF)
61    ]);
62
63    // 少数民族的文字
64
65    /** 藏文 */
66    pub static ref TIBETAN: Vec<u32> = expand_ranges(&[(0x0F00, 0x0FFF)]);
67
68    /** 蒙古文 */
69    pub static ref MONGOLIAN: Vec<u32> = expand_ranges(&[(0x1800, 0x18AF)]);
70
71    /** 傣文 */
72    pub static ref TAI_LE: Vec<u32> = expand_ranges(&[(0x1950, 0x197F)]);
73
74    /** 西双版纳傣文 */
75    pub static ref TAI_LUE: Vec<u32> = expand_ranges(&[(0x1980, 0x19DF)]);
76
77    /** 彝文 */
78    pub static ref YI: Vec<u32> = expand_ranges(&[
79        (0xA000, 0xA48F),
80        (0xA490, 0xA4C6)
81    ]);
82
83    /** 八思巴文 */
84    pub static ref PHAGS_PA: Vec<u32> = expand_ranges(&[(0xA840, 0xA87F)]);
85
86    /**
87     * 朝鲜文
88     * 采用韩文的解析方式
89     */
90
91    /** 傈僳文 */
92    pub static ref LISU: Vec<u32> = expand_ranges(&[(0x10C00, 0x10C4F)]);
93
94    /** 布依文 */
95    pub static ref BUHID: Vec<u32> = expand_ranges(&[(0x1740, 0x175F)]);
96
97    /** 苗文 */
98    pub static ref MIAO: Vec<u32> = expand_ranges(&[(0x16F00, 0x16F9F)]);
99
100    /** 哈尼文 */
101    pub static ref HANI: Vec<u32> = expand_ranges(&[(0x13A0, 0x13F5)]);
102
103    /** 拉祜文 */
104    pub static ref LAHU: Vec<u32> = expand_ranges(&[(0x10900, 0x1091F)]);
105
106    /** 佤文 */
107    pub static ref VA: Vec<u32> = expand_ranges(&[(0x10A00, 0x10A5F)]);
108
109    /** 壮文 */
110    pub static ref ZHUANG: Vec<u32> = expand_ranges(&[(0x10D30, 0x10D7F)]);
111
112    /** 纳西文 */
113    pub static ref NAXI_DONGBA: Vec<u32> = expand_ranges(&[(0x10FB0, 0x10FDF)]);
114
115}
116
117#[cfg(test)]
118mod tests {
119    use super::*;
120    #[test]
121    fn test() {
122        assert_eq!(LATIN.len(), 591)
123    }
124}
125
126pub fn create_default_unicode_area() -> [Vec<u32>; 27] {
127    [
128        LATIN.to_vec(),
129        GREEK.to_vec(),
130        CYRILLIC.to_vec(),
131        // 中文处理
132        ZH_COMMON.to_vec(),
133        ZH_SC.to_vec(),
134        ZH_TC.to_vec(),
135        // 日文处理
136        HIRAGANA_AND_KATAKANA.to_vec(),
137        // 韩文处理
138        HANGUL_JAMO.to_vec(),
139        HANGUL_SYL.to_vec(),
140        BENGALI.to_vec(),
141        ARABIC.to_vec(),
142        DEVANAGARI.to_vec(),
143        THAI.to_vec(),
144        KHMER.to_vec(),
145        TIBETAN.to_vec(),
146        MONGOLIAN.to_vec(),
147        TAI_LUE.to_vec(),
148        YI.to_vec(),
149        PHAGS_PA.to_vec(),
150        LISU.to_vec(),
151        BUHID.to_vec(),
152        MIAO.to_vec(),
153        HANI.to_vec(),
154        LAHU.to_vec(),
155        VA.to_vec(),
156        ZHUANG.to_vec(),
157        NAXI_DONGBA.to_vec(),
158    ]
159}