1use crate::error::Error;
2use crate::variant::KeyVariant;
3use serde::Serialize;
4use std::{
5 collections::HashMap,
6 fs::File,
7 io::{BufRead, BufReader},
8 path::PathBuf,
9};
10
11const CEDICT_SLASH: &str = "/";
13const CEDICT_BRACKET: [char; 2] = ['[', ']'];
14const VALID_LINE_FILTER: [char; 2] = ['#', '%'];
15
16#[derive(Debug)]
17pub struct Dictionary {
18 pub items: HashMap<String, Item>,
19}
20
21#[derive(Debug, Default, Clone, Serialize)]
22pub struct Item {
23 pub traditional_character: String,
24 pub simplified_character: String,
25 pub pinyin_tone_number: Vec<String>,
26 pub translations: Vec<String>,
27}
28
29impl Dictionary {
30 pub fn new(path: &PathBuf, key_variant: KeyVariant) -> Result<Dictionary, Error> {
37 let file = File::open(path)?;
38 let lines = BufReader::new(file).lines();
39 let mut items = HashMap::new();
40
41 for line in lines {
42 let line = line?;
43
44 if line.starts_with(VALID_LINE_FILTER) {
45 continue;
46 }
47
48 let item = Item::try_from(line)?;
51 match key_variant {
52 KeyVariant::Simplified => items.insert(item.simplified_character.clone(), item),
53 KeyVariant::Traditional => items.insert(item.traditional_character.clone(), item),
54 };
55 }
56
57 Ok(Dictionary { items })
58 }
59}
60
61impl TryFrom<String> for Item {
62 type Error = Error;
63
64 fn try_from(line: String) -> Result<Self, Self::Error> {
65 let translations_split_parts = line.split(CEDICT_SLASH).collect::<Vec<&str>>();
66
67 let rest = translations_split_parts
68 .first()
69 .ok_or_else(|| Error::Parse("Unable to found the rest".to_string()))?;
70
71 let translations = translations_split_parts
72 .get(1..)
73 .ok_or_else(|| Error::Parse("Unable to found the translations".to_string()))?
74 .iter()
75 .filter_map(filter_empty_check)
76 .collect::<Vec<_>>();
77
78 let pinyin_split_parts = rest.split(CEDICT_BRACKET).collect::<Vec<_>>();
79
80 let rest = pinyin_split_parts
81 .first()
82 .ok_or_else(|| Error::Parse("Unable to found the rest".to_string()))?;
83
84 let pinyin = pinyin_split_parts
85 .get(1)
86 .ok_or_else(|| Error::Parse("Unable to found pinyin".to_string()))?
87 .split_whitespace()
88 .filter_map(filter_empty_check)
89 .collect::<Vec<String>>();
90
91 let rest = rest.split_whitespace().collect::<Vec<_>>();
93
94 let traditional_character = rest
95 .first()
96 .ok_or_else(|| Error::Parse("Unable to found the tradtional character".to_string()))?
97 .to_string();
98
99 let simplified_character = rest
100 .last()
101 .ok_or_else(|| Error::Parse("Unable to found the tradtional character".to_string()))?
102 .to_string();
103
104 Ok(Item {
105 traditional_character,
106 simplified_character,
107 pinyin_tone_number: pinyin,
108 translations,
109 })
110 }
111}
112
113impl Item {
114 pub(crate) fn get_character_for_key_variant(&self, variant: &KeyVariant) -> String {
121 match variant {
122 KeyVariant::Simplified => self.simplified_character.clone(),
123 KeyVariant::Traditional => self.traditional_character.clone(),
124 }
125 }
126}
127
128fn filter_empty_check<S>(s: S) -> Option<String>
134where
135 S: AsRef<str>,
136{
137 if s.as_ref().is_empty() {
138 return None;
139 }
140
141 Some(s.as_ref().to_string())
142}
143
144#[cfg(test)]
145mod tests {
146 use super::*;
147
148 #[test]
149 fn expect_to_parse_line_item() {
150 let line = r"一動不動 一动不动 [yi1 dong4 bu4 dong4] /motionless/";
151 let item = Item::try_from(line.to_string());
152
153 assert!(item.is_ok());
154
155 let item = item.unwrap();
156 assert_eq!(item.traditional_character, "一動不動");
157 assert_eq!(item.simplified_character, "一动不动");
158 assert_eq!(
159 item.pinyin_tone_number,
160 vec!["yi1", "dong4", "bu4", "dong4"]
161 );
162 assert_eq!(item.translations, vec!["motionless"]);
163 }
164}