1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
use std::io::{self, BufReader}; use std::fs::{self, File}; use std::path::Path; use util::*; use Utf16Char; pub const SPACE_CHAR: Utf16Char = 0x0020u16; #[derive(Debug)] pub struct Category { pub id: i32, pub length: i32, pub invoke: bool, pub group: bool } pub struct CharCategory { categories: Box<[Category]>, char2id: Box<[i32]>, eql_masks: Box<[i32]> } impl CharCategory { pub fn new(data_dir: &Path) -> io::Result<CharCategory> { let path = data_dir.join("code2category"); let metadata = fs::metadata(&path)?; let mut reader = BufReader::new(File::open(path)?); Ok(CharCategory { categories: Self::read_categories(data_dir)?, char2id: reader.get_int_array((metadata.len() / 4 / 2) as usize)?, eql_masks: reader.get_int_array((metadata.len() / 4 / 2) as usize)? }) } pub fn is_compatible(&self, code1: Utf16Char, code2: Utf16Char) -> bool { (self.eql_masks[code1 as usize] & self.eql_masks[code2 as usize]) != 0 } fn read_categories(data_dir: &Path) -> io::Result<Box<[Category]>> { let data = read_all_as_int_array(&data_dir.join("char.category"))?; let size = data.len() / 4; let mut v = Vec::with_capacity(size); for i in 0..size { v.push(Category{ id: data[i * 4], length: data[i * 4 + 1], invoke: data[i * 4 + 2] == 1, group: data[i * 4 + 3] == 1 }); } Ok(v.into_boxed_slice()) } pub fn category(&self, code: Utf16Char) -> &Category { &self.categories[self.char2id[code as usize] as usize] } }