1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
use std::{fs, path::PathBuf};

use byteorder::{ByteOrder, LittleEndian};

use jpreprocess_core::{error::JPreprocessErrorKind, JPreprocessResult};

pub struct Dictionary {
    words_data: Vec<u8>,
    words_idx_data: Vec<u32>,
}

impl Dictionary {
    pub fn load(words_path: PathBuf, words_idx_path: PathBuf) -> JPreprocessResult<Dictionary> {
        Ok(Self::load_bin(
            Self::read_file(words_path)?,
            Self::read_file(words_idx_path)?,
        ))
    }
    pub fn load_bin(words_data: Vec<u8>, words_idx_data: Vec<u8>) -> Self {
        Self {
            words_data,
            words_idx_data: words_idx_data
                .chunks(4)
                .map(LittleEndian::read_u32)
                .collect(),
        }
    }

    pub fn get(&self, index: usize) -> Option<&[u8]> {
        let curr = (*self.words_idx_data.get(index)?).try_into().ok()?;
        let next = match self.words_idx_data.get(index + 1) {
            Some(next) => (*next).try_into().ok()?,
            None => self.words_data.len(),
        };
        Some(&self.words_data[curr..next])
    }

    fn read_file(path: PathBuf) -> JPreprocessResult<Vec<u8>> {
        fs::read(path).map_err(|e| JPreprocessErrorKind::Io.with_error(e))
    }
}

pub trait DictionaryTrait {
    type StoredType;

    fn load(dir: PathBuf) -> JPreprocessResult<Self>
    where
        Self: Sized;
    fn get(&self, index: usize) -> Option<Self::StoredType>;
    fn iter(&self) -> DictionaryIter<Self::StoredType>;
}

pub struct DictionaryIter<'a, T> {
    dict: &'a dyn DictionaryTrait<StoredType = T>,
    index: usize,
}

impl<'a, T> DictionaryIter<'a, T> {
    pub fn new<K>(dict: &'a K) -> Self
    where
        K: DictionaryTrait<StoredType = T>,
    {
        Self { dict, index: 0 }
    }
}

impl<'a, T> Iterator for DictionaryIter<'a, T> {
    type Item = T;

    fn next(&mut self) -> Option<Self::Item> {
        self.index += 1;
        self.dict.get(self.index - 1)
    }
}