use cjk_unicodes::{
HANGUL_JAMO, HANGUL_SYL, HIRAGANA_AND_KATAKANA, ZH_SC, ZH_SYMBOL, ZH_TC,
};
use lazy_static::lazy_static;
pub mod cjk_unicodes;
pub fn expand_ranges(ranges: &[(u32, u32)]) -> Vec<u32> {
ranges
.iter()
.flat_map(|&(start, end)| (start..=end).collect::<Vec<u32>>())
.collect()
}
lazy_static! {
pub static ref LATIN: Vec<u32> = expand_ranges(&[(0x0001, 0x00FF)]);
pub static ref LATIN_EXT_A: Vec<u32> = expand_ranges(&[(0x0100, 0x017F)]);
pub static ref LATIN_EXT_B: Vec<u32> = expand_ranges(&[(0x0180, 0x024F)]);
pub static ref GREEK: Vec<u32> = expand_ranges(&[(0x0370, 0x03FF), (0x1F00, 0x1FFF)]);
pub static ref CYRILLIC: Vec<u32> = expand_ranges(&[
(0x0400, 0x052F),
(0x1C80, 0x1C8F),
(0x2DE0, 0x2DFF),
(0xA640, 0xA69F)
]);
pub static ref ARABIC: Vec<u32> = expand_ranges(&[
(0x0600, 0x06FF),
(0x0750, 0x077F),
(0x0870, 0x08FF),
(0xFB50, 0xFDFF),
(0xFE70, 0xFEFF)
]);
pub static ref BENGALI: Vec<u32> = expand_ranges(&[(0x0980, 0x09FF)]);
pub static ref DEVANAGARI: Vec<u32> = expand_ranges(&[
(0x0900, 0x097F),
(0xA8E0, 0xA8FF),
(0x11B00, 0x11B5F)
]);
pub static ref THAI: Vec<u32> = expand_ranges(&[(0x0E00, 0x0E7F)]);
pub static ref KHMER: Vec<u32> = expand_ranges(&[
(0x1780, 0x17FF),
(0x19E0, 0x19FF)
]);
pub static ref TIBETAN: Vec<u32> = expand_ranges(&[(0x0F00, 0x0FFF)]);
pub static ref MONGOLIAN: Vec<u32> = expand_ranges(&[(0x1800, 0x18AF)]);
pub static ref TAI_LE: Vec<u32> = expand_ranges(&[(0x1950, 0x197F)]);
pub static ref TAI_LUE: Vec<u32> = expand_ranges(&[(0x1980, 0x19DF)]);
pub static ref YI: Vec<u32> = expand_ranges(&[
(0xA000, 0xA48F),
(0xA490, 0xA4C6)
]);
pub static ref PHAGS_PA: Vec<u32> = expand_ranges(&[(0xA840, 0xA87F)]);
pub static ref LISU: Vec<u32> = expand_ranges(&[(0x10C00, 0x10C4F)]);
pub static ref BUHID: Vec<u32> = expand_ranges(&[(0x1740, 0x175F)]);
pub static ref MIAO: Vec<u32> = expand_ranges(&[(0x16F00, 0x16F9F)]);
pub static ref HANI: Vec<u32> = expand_ranges(&[(0x13A0, 0x13F5)]);
pub static ref LAHU: Vec<u32> = expand_ranges(&[(0x10900, 0x1091F)]);
pub static ref VA: Vec<u32> = expand_ranges(&[(0x10A00, 0x10A5F)]);
pub static ref ZHUANG: Vec<u32> = expand_ranges(&[(0x10D30, 0x10D7F)]);
pub static ref NAXI_DONGBA: Vec<u32> = expand_ranges(&[(0x10FB0, 0x10FDF)]);
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test() {
assert_eq!(LATIN.len(), 255)
}
}
pub fn create_default_unicode_area() -> [Vec<u32>; 29] {
[
LATIN.to_vec(),
LATIN_EXT_A.to_vec(),
LATIN_EXT_B.to_vec(),
GREEK.to_vec(),
CYRILLIC.to_vec(),
ZH_SYMBOL.to_vec(),
ZH_SC.to_vec(),
ZH_TC.to_vec(),
HIRAGANA_AND_KATAKANA.to_vec(),
HANGUL_JAMO.to_vec(),
HANGUL_SYL.to_vec(),
BENGALI.to_vec(),
ARABIC.to_vec(),
DEVANAGARI.to_vec(),
THAI.to_vec(),
KHMER.to_vec(),
TIBETAN.to_vec(),
MONGOLIAN.to_vec(),
TAI_LUE.to_vec(),
YI.to_vec(),
PHAGS_PA.to_vec(),
LISU.to_vec(),
BUHID.to_vec(),
MIAO.to_vec(),
HANI.to_vec(),
LAHU.to_vec(),
VA.to_vec(),
ZHUANG.to_vec(),
NAXI_DONGBA.to_vec(),
]
}
pub fn create_default_unicode_area_tag() -> [&'static str; 29] {
[
"LATIN",
"LATIN_EXT_A",
"LATIN_EXT_B",
"GREEK",
"CYRILLIC",
"ZH_SYMBOL",
"ZH_SC",
"ZH_TC",
"HIRAGANA_AND_KATAKANA",
"HANGUL_JAMO",
"HANGUL_SYL",
"BENGALI",
"ARABIC",
"DEVANAGARI",
"THAI",
"KHMER",
"TIBETAN",
"MONGOLIAN",
"TAI_LUE",
"YI",
"PHAGS_PA",
"LISU",
"BUHID",
"MIAO",
"HANI",
"LAHU",
"VA",
"ZHUANG",
"NAXI_DONGBA",
]
}