lang_unicodes/
cjk_unicodes.rs

1use crate::expand_ranges;
2use cn_font_utils::u8_array_to_u16_array;
3use lazy_static::lazy_static;
4
5static HANGUL_SYL_SOURCE: &[u8] = include_bytes!("../data/hangul-syl.dat");
6
7static CN_CHAR_RANK: &[u8] = include_bytes!("../data/cn_char_rank.dat");
8
9fn get_part_from_cn_pkg(part_no: u8) -> Option<Vec<u32>> {
10    let data = u8_array_to_u16_array(CN_CHAR_RANK);
11    let mut last_index = 0;
12    let mut part_no = part_no as isize;
13
14    for (i, &element) in data.iter().enumerate() {
15        if element == 0 {
16            part_no -= 1;
17            if part_no < 0 {
18                return Some(
19                    data[last_index..i]
20                        .to_vec()
21                        .into_iter()
22                        .map(|i| i as u32)
23                        .collect::<Vec<u32>>(),
24                );
25            }
26            last_index = i + 1;
27        }
28    }
29
30    if part_no == 0 {
31        return Some(
32            data[last_index..]
33                .to_vec()
34                .into_iter()
35                .map(|i| i as u32)
36                .collect::<Vec<u32>>(),
37        );
38    }
39    None
40}
41
42lazy_static! {
43    pub static ref ZH_SYMBOL: Vec<u32> = get_part_from_cn_pkg(0).unwrap();
44    pub static ref ZH_SC: Vec<u32> = get_part_from_cn_pkg(1).unwrap();
45    pub static ref ZH_TC: Vec<u32> = get_part_from_cn_pkg(2).unwrap();
46    pub static ref HANGUL_SYL: Vec<u32> =
47        u8_array_to_u16_array(HANGUL_SYL_SOURCE)
48            .into_iter()
49            .map(|x| x as u32)
50            .collect();
51    pub static ref HIRAGANA_AND_KATAKANA: Vec<u32> =
52        expand_ranges(&[(0x3040, 0x309F), (0x30A0, 0x30FF)]);
53    pub static ref HANGUL_JAMO: Vec<u32> = expand_ranges(&[(0x1100, 0x11FF)]);
54}
55
56#[cfg(test)]
57mod tests {
58    use super::*;
59    #[test]
60    fn test() {
61        assert_eq!(HIRAGANA_AND_KATAKANA.len(), 192);
62        assert_eq!(ZH_SYMBOL.len(), 74);
63        assert_eq!(ZH_SC.len(), 7000);
64        assert_eq!(ZH_TC.len(), 932);
65        assert_eq!(HANGUL_SYL.len(), 2026);
66        println!(
67            "{}",
68            ZH_SC
69                .iter()
70                .map(|i| { std::char::from_u32(i.clone()).unwrap() })
71                .collect::<String>()
72        )
73    }
74}