1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
use std::io::{self, BufReader};
use std::fs::{self, File};
use std::path::Path;
use util::*;
use Utf16Char;


pub const SPACE_CHAR: Utf16Char = 0x0020u16;

#[derive(Debug)]
pub struct Category {
    pub id: i32,
    pub length: i32,
    pub invoke: bool,
    pub group: bool
}

pub struct CharCategory {
    categories: Box<[Category]>,
    char2id: Box<[i32]>,
    eql_masks: Box<[i32]>
}

impl CharCategory {
    pub fn new(data_dir: &Path) -> io::Result<CharCategory> {
        let path = data_dir.join("code2category");
        let metadata = fs::metadata(&path)?;
        let mut reader = BufReader::new(File::open(path)?);

        Ok(CharCategory {
            categories: Self::read_categories(data_dir)?,
            char2id: reader.get_int_array((metadata.len() / 4 / 2) as usize)?,
            eql_masks: reader.get_int_array((metadata.len() / 4 / 2) as usize)?
        })
    }

    pub fn is_compatible(&self, code1: Utf16Char, code2: Utf16Char) -> bool {
        (self.eql_masks[code1 as usize] & self.eql_masks[code2 as usize]) != 0
    }

    fn read_categories(data_dir: &Path) -> io::Result<Box<[Category]>> {
        let data = read_all_as_int_array(&data_dir.join("char.category"))?;
        let size = data.len() / 4;
        let mut v = Vec::with_capacity(size);
        for i in 0..size {
            v.push(Category{
                id: data[i * 4],
                length: data[i * 4 + 1],
                invoke: data[i * 4 + 2] == 1,
                group: data[i * 4 + 3] == 1
            });
        }
        Ok(v.into_boxed_slice())
    }

    pub fn category(&self, code: Utf16Char) -> &Category {
        &self.categories[self.char2id[code as usize] as usize]
    }
}