1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
use crate::error::Error;
use serde::Serialize;
use std::{
    collections::HashMap,
    fs::File,
    io::{BufRead, BufReader},
    path::PathBuf,
};

// Constant
const CEDICT_SLASH: &str = "/";
const CEDICT_BRACKET: [char; 2] = ['[', ']'];
const VALID_LINE_FILTER: [char; 2] = ['#', '%'];

#[derive(Debug)]
pub enum KeyVariant {
    Simplified,
    Traditional,
}

#[derive(Debug)]
pub struct Dictionary {
    pub items: HashMap<String, Item>,
}

#[derive(Debug, Default, Clone, Serialize)]
pub struct Item {
    pub traditional_character: String,
    pub simplified_character: String,
    pub pinyin_tone_number: Vec<String>,
    pub translations: Vec<String>,
}

impl Dictionary {
    /// Create a new cedict dictionnary
    ///
    /// # Arguments
    ///
    /// * `path` - PathBuf
    /// * `key_variant` - KeyVariant
    pub fn new(path: PathBuf, key_variant: KeyVariant) -> Result<Dictionary, Error> {
        let file = File::open(path)?;
        let lines = BufReader::new(file).lines();
        let mut items = HashMap::new();

        for line in lines {
            let line = line?;

            if line.starts_with(VALID_LINE_FILTER) {
                continue;
            }

            // A cedict line is composed using the format below
            // <traditional_chinese> <simplified_chinese> <pinyin> <translations>
            let item = Item::try_from(line.as_str())?;
            match key_variant {
                KeyVariant::Simplified => items.insert(item.simplified_character.clone(), item),
                KeyVariant::Traditional => items.insert(item.traditional_character.clone(), item),
            };
        }

        Ok(Dictionary { items })
    }
}

impl TryFrom<&str> for Item {
    type Error = Error;

    fn try_from(line: &str) -> Result<Self, Self::Error> {
        let translations_split_parts = line.split(CEDICT_SLASH).collect::<Vec<&str>>();

        let rest = translations_split_parts
            .first()
            .ok_or_else(|| Error::Parse("Unable to found the rest".to_string()))?;

        let translations = translations_split_parts
            .get(1..)
            .ok_or_else(|| Error::Parse("Unable to found the translations".to_string()))?
            .iter()
            .filter_map(filter_empty_check)
            .collect::<Vec<_>>();

        let pinyin_split_parts = rest.split(CEDICT_BRACKET).collect::<Vec<_>>();

        let rest = pinyin_split_parts
            .first()
            .ok_or_else(|| Error::Parse("Unable to found the rest".to_string()))?;

        let pinyin = pinyin_split_parts
            .get(1)
            .ok_or_else(|| Error::Parse("Unable to found pinyin".to_string()))?
            .split_whitespace()
            .filter_map(filter_empty_check)
            .collect::<Vec<String>>();

        // Splitting the whitespace allow of the rest allow us to get the traditional & simplified chinese character
        let rest = rest.split_whitespace().collect::<Vec<_>>();

        let traditional_character = rest
            .first()
            .ok_or_else(|| Error::Parse("Unable to found the tradtional character".to_string()))?
            .to_string();

        let simplified_character = rest
            .last()
            .ok_or_else(|| Error::Parse("Unable to found the tradtional character".to_string()))?
            .to_string();

        Ok(Item {
            traditional_character,
            simplified_character,
            pinyin_tone_number: pinyin,
            translations,
        })
    }
}

/// Filter empty string out and return a string value
///
/// # Arguments
///
/// * `s` - S
fn filter_empty_check<S>(s: S) -> Option<String>
where
    S: AsRef<str>,
{
    if s.as_ref().is_empty() {
        return None;
    }

    Some(s.as_ref().to_string())
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn expect_to_parse_line_item() {
        let line = r"一動不動 一动不动 [yi1 dong4 bu4 dong4] /motionless/";
        let item = Item::try_from(line);

        assert!(item.is_ok());

        let item = item.unwrap();
        assert_eq!(item.traditional_character, "一動不動");
        assert_eq!(item.simplified_character, "一动不动");
        assert_eq!(
            item.pinyin_tone_number,
            vec!["yi1", "dong4", "bu4", "dong4"]
        );
        assert_eq!(item.translations, vec!["motionless"]);
    }
}