1use serde::{Deserialize, Serialize};
2use serde_json::Value;
3
4#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
5pub struct DictEntry {
6 pub id: String,
7 pub word: String,
8 pub word_lower: String,
9 pub phonetic_uk: Option<String>,
10 pub phonetic_us: Option<String>,
11 pub definitions: Vec<Definition>,
12 pub pos: Vec<String>,
13 pub collins_star: u8,
14 pub oxford_3000: bool,
15 pub tags: Vec<String>,
16 pub freq_bnc: Option<u32>,
17 pub freq_coca: Option<u32>,
18 pub exchanges: Vec<Exchange>,
19 pub examples: Vec<Example>,
20 pub synonyms: Vec<Synonym>,
21 pub phrases: Vec<Phrase>,
22 pub related_words: Vec<RelatedWord>,
23 pub mnemonic: Option<String>,
24 pub source: DictSource,
25 pub extra: Value,
26}
27
28impl DictEntry {
29 pub fn new(source: DictSource, word: impl Into<String>) -> Self {
30 let word = word.into();
31 let word_lower = word.to_lowercase();
32 let id = format!("{}:{}", source.slug(), word_lower);
33
34 Self {
35 id,
36 word,
37 word_lower,
38 phonetic_uk: None,
39 phonetic_us: None,
40 definitions: Vec::new(),
41 pos: Vec::new(),
42 collins_star: 0,
43 oxford_3000: false,
44 tags: Vec::new(),
45 freq_bnc: None,
46 freq_coca: None,
47 exchanges: Vec::new(),
48 examples: Vec::new(),
49 synonyms: Vec::new(),
50 phrases: Vec::new(),
51 related_words: Vec::new(),
52 mnemonic: None,
53 source,
54 extra: Value::Null,
55 }
56 }
57
58 pub fn all_text(&self) -> String {
59 let mut parts = Vec::new();
60 parts.push(self.word.clone());
61 parts.push(self.word_lower.clone());
62
63 if let Some(value) = &self.phonetic_uk {
64 parts.push(value.clone());
65 }
66 if let Some(value) = &self.phonetic_us {
67 parts.push(value.clone());
68 }
69
70 for definition in &self.definitions {
71 parts.push(definition.pos.clone().unwrap_or_default());
72 parts.push(definition.zh.clone());
73 parts.push(definition.en.clone());
74 }
75 for example in &self.examples {
76 parts.push(example.en.clone());
77 parts.push(example.zh.clone());
78 }
79 for phrase in &self.phrases {
80 parts.push(phrase.en.clone());
81 parts.push(phrase.zh.clone());
82 }
83 for synonym in &self.synonyms {
84 parts.push(synonym.zh_meaning.clone());
85 parts.extend(synonym.words.clone());
86 }
87 for related in &self.related_words {
88 parts.push(related.pos.clone());
89 for word in &related.words {
90 parts.push(word.word.clone());
91 parts.push(word.translation.clone());
92 }
93 }
94 if let Some(value) = &self.mnemonic {
95 parts.push(value.clone());
96 }
97
98 parts.join(" ")
99 }
100
101 pub fn primary_translation(&self) -> Option<&str> {
102 self.definitions
103 .iter()
104 .map(|definition| definition.zh.trim())
105 .find(|value| !value.is_empty())
106 }
107
108 pub fn primary_definition(&self) -> Option<&str> {
109 self.definitions
110 .iter()
111 .map(|definition| definition.en.trim())
112 .find(|value| !value.is_empty())
113 }
114
115 pub fn phonetic(&self) -> Option<&str> {
116 self.phonetic_uk
117 .as_deref()
118 .filter(|value| !value.trim().is_empty())
119 .or_else(|| {
120 self.phonetic_us
121 .as_deref()
122 .filter(|value| !value.trim().is_empty())
123 })
124 }
125}
126
127#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
128pub struct Definition {
129 pub en: String,
130 pub zh: String,
131 pub pos: Option<String>,
132}
133
134impl Definition {
135 pub fn new(en: impl Into<String>, zh: impl Into<String>, pos: Option<String>) -> Self {
136 Self {
137 en: clean_text(en.into()),
138 zh: clean_text(zh.into()),
139 pos: pos.map(clean_pos).filter(|value| !value.is_empty()),
140 }
141 }
142}
143
144#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
145pub struct Example {
146 pub en: String,
147 pub zh: String,
148}
149
150#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
151pub struct Phrase {
152 pub en: String,
153 pub zh: String,
154}
155
156#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
157pub struct Synonym {
158 pub pos: Option<String>,
159 pub zh_meaning: String,
160 pub words: Vec<String>,
161}
162
163#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
164pub struct RelatedWord {
165 pub pos: String,
166 pub words: Vec<RelatedWordItem>,
167}
168
169#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
170pub struct RelatedWordItem {
171 pub word: String,
172 pub translation: String,
173}
174
175#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
176pub struct Exchange {
177 pub kind: String,
178 pub word: String,
179}
180
181#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
182#[serde(tag = "type", rename_all = "snake_case")]
183pub enum DictSource {
184 Ecdict,
185 Anki { deck_name: String },
186 Sqlite { name: String, table: String },
187 Mdx { filename: String },
188 Custom { name: String },
189}
190
191impl DictSource {
192 pub fn slug(&self) -> String {
193 match self {
194 Self::Ecdict => "ecdict".to_string(),
195 Self::Anki { deck_name } => format!("anki-{}", slugify(deck_name)),
196 Self::Sqlite { name, table } => format!("sqlite-{}-{}", slugify(name), slugify(table)),
197 Self::Mdx { filename } => format!("mdx-{}", slugify(filename)),
198 Self::Custom { name } => slugify(name),
199 }
200 }
201
202 pub fn display_name(&self) -> String {
203 match self {
204 Self::Ecdict => "ECDICT".to_string(),
205 Self::Anki { deck_name } => deck_name.clone(),
206 Self::Sqlite { name, table } => format!("{name}:{table}"),
207 Self::Mdx { filename } => filename.clone(),
208 Self::Custom { name } => name.clone(),
209 }
210 }
211}
212
213pub fn clean_pos(value: impl AsRef<str>) -> String {
214 let value = value.as_ref().trim().trim_end_matches('.');
215 let lower = value.to_ascii_lowercase();
216 let normalized = match lower.as_str() {
217 "noun" | "n" => "n".to_string(),
218 "verb" | "v" => "v".to_string(),
219 "vi" | "vt" => lower,
220 "adjective" | "adj" | "a" => "adj".to_string(),
221 "adverb" | "adv" => "adv".to_string(),
222 "prep" | "preposition" => "prep".to_string(),
223 "conj" | "conjunction" => "conj".to_string(),
224 "pron" | "pronoun" => "pron".to_string(),
225 "interj" | "int" => "int".to_string(),
226 "art" | "article" => "art".to_string(),
227 _ => lower,
228 };
229 normalized
230}
231
232pub fn normalize_tag(value: impl AsRef<str>) -> String {
233 let tag = value.as_ref().trim().to_ascii_lowercase();
234 match tag.as_str() {
235 "zk" | "zhongkao" | "中考" => "zk",
236 "gk" | "gaokao" | "高考" => "gk",
237 "ky" | "kaoyan" | "考研" => "kao_yan",
238 "cet-4" | "cet_4" => "cet4",
239 "cet-6" | "cet_6" => "cet6",
240 other => other,
241 }
242 .to_string()
243}
244
245pub fn clean_text(value: impl AsRef<str>) -> String {
246 value
247 .as_ref()
248 .split_whitespace()
249 .collect::<Vec<_>>()
250 .join(" ")
251 .trim()
252 .to_string()
253}
254
255fn slugify(value: &str) -> String {
256 let mut out = String::with_capacity(value.len());
257 for ch in value.chars() {
258 if ch.is_ascii_alphanumeric() {
259 out.push(ch.to_ascii_lowercase());
260 } else if ch.is_whitespace() || matches!(ch, '_' | '-' | '.' | '/') {
261 if !out.ends_with('-') {
262 out.push('-');
263 }
264 }
265 }
266 let out = out.trim_matches('-').to_string();
267 if out.is_empty() {
268 "source".to_string()
269 } else {
270 out
271 }
272}