1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
use bincode::Options;
use lindera_core::{error::LinderaErrorKind, LinderaResult};

use jpreprocess_core::{error::JPreprocessErrorKind, word_entry::WordEntry, JPreprocessResult};

use crate::DictionarySerializer;

use self::bincode_serializer::SERIALIZE_OPTION;

mod bincode_serializer {
    use bincode::config::*;
    use once_cell::sync::Lazy;

    type Serializer = WithOtherTrailing<
        WithOtherIntEncoding<
            WithOtherEndian<WithOtherLimit<DefaultOptions, Infinite>, LittleEndian>,
            VarintEncoding,
        >,
        AllowTrailing,
    >;
    pub static SERIALIZE_OPTION: Lazy<Serializer> = Lazy::new(|| {
        bincode::config::DefaultOptions::new()
            .with_no_limit()
            .with_little_endian()
            .with_varint_encoding()
            .allow_trailing_bytes()
    });
}

pub struct JPreprocessSerializer;

impl DictionarySerializer for JPreprocessSerializer {
    fn identifier(&self) -> String {
        format!("JPreprocess v{}", env!("CARGO_PKG_VERSION"))
    }
    fn serialize(&self, row: &[String]) -> LinderaResult<Vec<u8>> {
        let mut str_details = row.iter().map(|d| &d[..]).collect::<Vec<&str>>();
        str_details.resize(13, "");
        match WordEntry::load(&str_details[..]) {
            Ok(entry) => SERIALIZE_OPTION
                .serialize(&entry)
                .map_err(|err| LinderaErrorKind::Serialize.with_error(anyhow::anyhow!(err))),
            Err(err) => {
                eprintln!("ERR: jpreprocess parse failed. Word:\n{:?}", &row);
                Err(LinderaErrorKind::Serialize.with_error(anyhow::anyhow!(err)))
            }
        }
    }

    fn deserialize(&self, data: &[u8]) -> JPreprocessResult<WordEntry> {
        let details: WordEntry = SERIALIZE_OPTION
            .deserialize(data)
            .map_err(|err| JPreprocessErrorKind::WordNotFoundError.with_error(err))?;
        Ok(details)
    }
    fn deserialize_debug(&self, data: &[u8]) -> String {
        format!("{:?}", self.deserialize(data))
    }
    fn deserialize_with_string(&self, data: &[u8], string: String) -> LinderaResult<String> {
        let word_entry: WordEntry = SERIALIZE_OPTION
            .deserialize(data)
            .map_err(|err| LinderaErrorKind::Deserialize.with_error(anyhow::anyhow!(err)))?;
        Ok(word_entry.to_str_vec(string).join(","))
    }
}

#[cfg(test)]
mod tests {
    use crate::DictionarySerializer;

    use super::JPreprocessSerializer;

    const OKIBI: [u8; 33] = [
        0, 10, 2, 12, 27, 1, 9, 227, 130, 170, 227, 130, 173, 227, 131, 147, 3, 144, 1, 141, 1, 59,
        1, 0, 6, 1, 6, 0, 0, 0, 0, 0, 0,
    ];

    #[test]
    fn serialize() {
        let serlializer = JPreprocessSerializer;
        let input_str = "名詞,一般,*,*,*,*,おき火,オキビ,オキビ,0/3,C2,-1";
        let input: Vec<String> = input_str.split(',').map(str::to_string).collect();
        let bytes = serlializer.serialize(&input).unwrap();
        assert_eq!(&bytes, OKIBI.as_slice());

        let deserialized = serlializer
            .deserialize(&bytes)
            .unwrap()
            .to_str_vec("おき火".to_string())
            .join(",");
        assert_eq!(input_str, deserialized);
    }
}