lang_unicodes/
cjk_unicodes.rs1use crate::expand_ranges;
2use cn_font_utils::u8_array_to_u16_array;
3use lazy_static::lazy_static;
4
5static HANGUL_SYL_SOURCE: &[u8] = include_bytes!("../data/hangul-syl.dat");
6
7static CN_CHAR_RANK: &[u8] = include_bytes!("../data/cn_char_rank.dat");
8
9fn get_part_from_cn_pkg(part_no: u8) -> Option<Vec<u32>> {
10 let data = u8_array_to_u16_array(CN_CHAR_RANK);
11 let mut last_index = 0;
12 let mut part_no = part_no as isize;
13
14 for (i, &element) in data.iter().enumerate() {
15 if element == 0 {
16 part_no -= 1;
17 if part_no < 0 {
18 return Some(
19 data[last_index..i]
20 .to_vec()
21 .into_iter()
22 .map(|i| i as u32)
23 .collect::<Vec<u32>>(),
24 );
25 }
26 last_index = i + 1;
27 }
28 }
29
30 if part_no == 0 {
31 return Some(
32 data[last_index..]
33 .to_vec()
34 .into_iter()
35 .map(|i| i as u32)
36 .collect::<Vec<u32>>(),
37 );
38 }
39 None
40}
41
42lazy_static! {
43 pub static ref ZH_SYMBOL: Vec<u32> = get_part_from_cn_pkg(0).unwrap();
44 pub static ref ZH_SC: Vec<u32> = get_part_from_cn_pkg(1).unwrap();
45 pub static ref ZH_TC: Vec<u32> = get_part_from_cn_pkg(2).unwrap();
46 pub static ref HANGUL_SYL: Vec<u32> =
47 u8_array_to_u16_array(HANGUL_SYL_SOURCE)
48 .into_iter()
49 .map(|x| x as u32)
50 .collect();
51 pub static ref HIRAGANA_AND_KATAKANA: Vec<u32> =
52 expand_ranges(&[(0x3040, 0x309F), (0x30A0, 0x30FF)]);
53 pub static ref HANGUL_JAMO: Vec<u32> = expand_ranges(&[(0x1100, 0x11FF)]);
54}
55
56#[cfg(test)]
57mod tests {
58 use super::*;
59 #[test]
60 fn test() {
61 assert_eq!(HIRAGANA_AND_KATAKANA.len(), 192);
62 assert_eq!(ZH_SYMBOL.len(), 74);
63 assert_eq!(ZH_SC.len(), 7000);
64 assert_eq!(ZH_TC.len(), 932);
65 assert_eq!(HANGUL_SYL.len(), 2026);
66 println!(
67 "{}",
68 ZH_SC
69 .iter()
70 .map(|i| { std::char::from_u32(i.clone()).unwrap() })
71 .collect::<String>()
72 )
73 }
74}