1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
use lindera_core::{error::LinderaErrorKind, LinderaResult};

pub trait DictionarySerializer {
    fn identifier(&self) -> String;
    fn serialize(&self, row: &[String]) -> LinderaResult<Vec<u8>>;
    fn simple(&self, row: &[String]) -> LinderaResult<Vec<u8>> {
        let details = vec![
            row[1].to_string(), // POS
            "*".to_string(),    // POS subcategory 1
            "*".to_string(),    // POS subcategory 2
            "*".to_string(),    // POS subcategory 3
            "*".to_string(),    // Conjugation type
            "*".to_string(),    // Conjugation form
            row[0].to_string(), // Base form
            row[2].to_string(), // Reading
            "*".to_string(),    // Pronunciation
        ];
        self.serialize(&details)
    }
    fn deserialize(&self, data: &[u8], string: String) -> LinderaResult<String>;
}

pub struct LinderaSerializer;
impl DictionarySerializer for LinderaSerializer {
    fn identifier(&self) -> String {
        "Lindera".to_string()
    }
    fn serialize(&self, row: &[String]) -> LinderaResult<Vec<u8>> {
        let mut word_detail = Vec::new();
        for item in row.iter() {
            word_detail.push(item.to_string());
        }
        bincode::serialize(&word_detail)
            .map_err(|err| LinderaErrorKind::Serialize.with_error(anyhow::anyhow!(err)))
    }
    fn deserialize(&self, data: &[u8], _string: String) -> LinderaResult<String> {
        bincode::deserialize(data)
            .map(|v: Vec<String>| v.join(","))
            .map_err(|err| LinderaErrorKind::Deserialize.with_error(anyhow::anyhow!(err)))
    }
}

pub struct JPreprocessSerializer;
impl DictionarySerializer for JPreprocessSerializer {
    fn identifier(&self) -> String {
        format!("JPreprocess v{}", env!("CARGO_PKG_VERSION"))
    }
    fn serialize(&self, row: &[String]) -> LinderaResult<Vec<u8>> {
        use jpreprocess_core::word_entry::WordEntry;
        let mut str_details = row.iter().map(|d| &d[..]).collect::<Vec<&str>>();
        str_details.resize(13, "");
        match WordEntry::load(&str_details[..]) {
            Ok(entry) => bincode::serialize(&entry)
                .map_err(|err| LinderaErrorKind::Serialize.with_error(anyhow::anyhow!(err))),
            Err(err) => {
                eprintln!("ERR: jpreprocess parse failed. Word:\n{:?}", &row);
                Err(LinderaErrorKind::Serialize.with_error(anyhow::anyhow!(err)))
            }
        }
    }
    fn deserialize(&self, data: &[u8], string: String) -> LinderaResult<String> {
        use jpreprocess_core::word_entry::WordEntry;
        let word_entry: WordEntry = bincode::deserialize(data)
            .map_err(|err| LinderaErrorKind::Deserialize.with_error(anyhow::anyhow!(err)))?;
        Ok(word_entry.to_str_vec(string).join(","))
    }
}