dodo_zh/
cedict.rs

1use crate::error::Error;
2use crate::variant::KeyVariant;
3use serde::Serialize;
4use std::{
5    collections::HashMap,
6    fs::File,
7    io::{BufRead, BufReader},
8    path::PathBuf,
9};
10
11// Constant
12const CEDICT_SLASH: &str = "/";
13const CEDICT_BRACKET: [char; 2] = ['[', ']'];
14const VALID_LINE_FILTER: [char; 2] = ['#', '%'];
15
16#[derive(Debug)]
17pub struct Dictionary {
18    pub items: HashMap<String, Item>,
19}
20
21#[derive(Debug, Default, Clone, Serialize)]
22pub struct Item {
23    pub traditional_character: String,
24    pub simplified_character: String,
25    pub pinyin_tone_number: Vec<String>,
26    pub translations: Vec<String>,
27}
28
29impl Dictionary {
30    /// Create a new cedict dictionnary
31    ///
32    /// # Arguments
33    ///
34    /// * `path` - PathBuf
35    /// * `key_variant` - KeyVariant
36    pub fn new(path: &PathBuf, key_variant: KeyVariant) -> Result<Dictionary, Error> {
37        let file = File::open(path)?;
38        let lines = BufReader::new(file).lines();
39        let mut items = HashMap::new();
40
41        for line in lines {
42            let line = line?;
43
44            if line.starts_with(VALID_LINE_FILTER) {
45                continue;
46            }
47
48            // A cedict line is composed using the format below
49            // <traditional_chinese> <simplified_chinese> <pinyin> <translations>
50            let item = Item::try_from(line)?;
51            match key_variant {
52                KeyVariant::Simplified => items.insert(item.simplified_character.clone(), item),
53                KeyVariant::Traditional => items.insert(item.traditional_character.clone(), item),
54            };
55        }
56
57        Ok(Dictionary { items })
58    }
59}
60
61impl TryFrom<String> for Item {
62    type Error = Error;
63
64    fn try_from(line: String) -> Result<Self, Self::Error> {
65        let translations_split_parts = line.split(CEDICT_SLASH).collect::<Vec<&str>>();
66
67        let rest = translations_split_parts
68            .first()
69            .ok_or_else(|| Error::Parse("Unable to found the rest".to_string()))?;
70
71        let translations = translations_split_parts
72            .get(1..)
73            .ok_or_else(|| Error::Parse("Unable to found the translations".to_string()))?
74            .iter()
75            .filter_map(filter_empty_check)
76            .collect::<Vec<_>>();
77
78        let pinyin_split_parts = rest.split(CEDICT_BRACKET).collect::<Vec<_>>();
79
80        let rest = pinyin_split_parts
81            .first()
82            .ok_or_else(|| Error::Parse("Unable to found the rest".to_string()))?;
83
84        let pinyin = pinyin_split_parts
85            .get(1)
86            .ok_or_else(|| Error::Parse("Unable to found pinyin".to_string()))?
87            .split_whitespace()
88            .filter_map(filter_empty_check)
89            .collect::<Vec<String>>();
90
91        // Splitting the whitespace allow of the rest allow us to get the traditional & simplified chinese character
92        let rest = rest.split_whitespace().collect::<Vec<_>>();
93
94        let traditional_character = rest
95            .first()
96            .ok_or_else(|| Error::Parse("Unable to found the tradtional character".to_string()))?
97            .to_string();
98
99        let simplified_character = rest
100            .last()
101            .ok_or_else(|| Error::Parse("Unable to found the tradtional character".to_string()))?
102            .to_string();
103
104        Ok(Item {
105            traditional_character,
106            simplified_character,
107            pinyin_tone_number: pinyin,
108            translations,
109        })
110    }
111}
112
113impl Item {
114    /// Get the character for the given key variant
115    ///
116    /// # Arguments
117    ///
118    /// * `self` - Item
119    /// * `variant` - KeyVariant
120    pub(crate) fn get_character_for_key_variant(&self, variant: &KeyVariant) -> String {
121        match variant {
122            KeyVariant::Simplified => self.simplified_character.clone(),
123            KeyVariant::Traditional => self.traditional_character.clone(),
124        }
125    }
126}
127
128/// Filter empty string out and return a string value
129///
130/// # Arguments
131///
132/// * `s` - S
133fn filter_empty_check<S>(s: S) -> Option<String>
134where
135    S: AsRef<str>,
136{
137    if s.as_ref().is_empty() {
138        return None;
139    }
140
141    Some(s.as_ref().to_string())
142}
143
144#[cfg(test)]
145mod tests {
146    use super::*;
147
148    #[test]
149    fn expect_to_parse_line_item() {
150        let line = r"一動不動 一动不动 [yi1 dong4 bu4 dong4] /motionless/";
151        let item = Item::try_from(line.to_string());
152
153        assert!(item.is_ok());
154
155        let item = item.unwrap();
156        assert_eq!(item.traditional_character, "一動不動");
157        assert_eq!(item.simplified_character, "一动不动");
158        assert_eq!(
159            item.pinyin_tone_number,
160            vec!["yi1", "dong4", "bu4", "dong4"]
161        );
162        assert_eq!(item.translations, vec!["motionless"]);
163    }
164}