Skip to main content

dictx_core/
entry.rs

1use serde::{Deserialize, Serialize};
2use serde_json::Value;
3
4#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
5pub struct DictEntry {
6    pub id: String,
7    pub word: String,
8    pub word_lower: String,
9    pub phonetic_uk: Option<String>,
10    pub phonetic_us: Option<String>,
11    pub definitions: Vec<Definition>,
12    pub pos: Vec<String>,
13    pub collins_star: u8,
14    pub oxford_3000: bool,
15    pub tags: Vec<String>,
16    pub freq_bnc: Option<u32>,
17    pub freq_coca: Option<u32>,
18    pub exchanges: Vec<Exchange>,
19    pub examples: Vec<Example>,
20    pub synonyms: Vec<Synonym>,
21    pub phrases: Vec<Phrase>,
22    pub related_words: Vec<RelatedWord>,
23    pub mnemonic: Option<String>,
24    pub source: DictSource,
25    pub extra: Value,
26}
27
28impl DictEntry {
29    pub fn new(source: DictSource, word: impl Into<String>) -> Self {
30        let word = word.into();
31        let word_lower = word.to_lowercase();
32        let id = format!("{}:{}", source.slug(), word_lower);
33
34        Self {
35            id,
36            word,
37            word_lower,
38            phonetic_uk: None,
39            phonetic_us: None,
40            definitions: Vec::new(),
41            pos: Vec::new(),
42            collins_star: 0,
43            oxford_3000: false,
44            tags: Vec::new(),
45            freq_bnc: None,
46            freq_coca: None,
47            exchanges: Vec::new(),
48            examples: Vec::new(),
49            synonyms: Vec::new(),
50            phrases: Vec::new(),
51            related_words: Vec::new(),
52            mnemonic: None,
53            source,
54            extra: Value::Null,
55        }
56    }
57
58    pub fn all_text(&self) -> String {
59        let mut parts = Vec::new();
60        parts.push(self.word.clone());
61        parts.push(self.word_lower.clone());
62
63        if let Some(value) = &self.phonetic_uk {
64            parts.push(value.clone());
65        }
66        if let Some(value) = &self.phonetic_us {
67            parts.push(value.clone());
68        }
69
70        for definition in &self.definitions {
71            parts.push(definition.pos.clone().unwrap_or_default());
72            parts.push(definition.zh.clone());
73            parts.push(definition.en.clone());
74        }
75        for example in &self.examples {
76            parts.push(example.en.clone());
77            parts.push(example.zh.clone());
78        }
79        for phrase in &self.phrases {
80            parts.push(phrase.en.clone());
81            parts.push(phrase.zh.clone());
82        }
83        for synonym in &self.synonyms {
84            parts.push(synonym.zh_meaning.clone());
85            parts.extend(synonym.words.clone());
86        }
87        for related in &self.related_words {
88            parts.push(related.pos.clone());
89            for word in &related.words {
90                parts.push(word.word.clone());
91                parts.push(word.translation.clone());
92            }
93        }
94        if let Some(value) = &self.mnemonic {
95            parts.push(value.clone());
96        }
97
98        parts.join(" ")
99    }
100
101    pub fn primary_translation(&self) -> Option<&str> {
102        self.definitions
103            .iter()
104            .map(|definition| definition.zh.trim())
105            .find(|value| !value.is_empty())
106    }
107
108    pub fn primary_definition(&self) -> Option<&str> {
109        self.definitions
110            .iter()
111            .map(|definition| definition.en.trim())
112            .find(|value| !value.is_empty())
113    }
114
115    pub fn phonetic(&self) -> Option<&str> {
116        self.phonetic_uk
117            .as_deref()
118            .filter(|value| !value.trim().is_empty())
119            .or_else(|| {
120                self.phonetic_us
121                    .as_deref()
122                    .filter(|value| !value.trim().is_empty())
123            })
124    }
125}
126
127#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
128pub struct Definition {
129    pub en: String,
130    pub zh: String,
131    pub pos: Option<String>,
132}
133
134impl Definition {
135    pub fn new(en: impl Into<String>, zh: impl Into<String>, pos: Option<String>) -> Self {
136        Self {
137            en: clean_text(en.into()),
138            zh: clean_text(zh.into()),
139            pos: pos.map(clean_pos).filter(|value| !value.is_empty()),
140        }
141    }
142}
143
144#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
145pub struct Example {
146    pub en: String,
147    pub zh: String,
148}
149
150#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
151pub struct Phrase {
152    pub en: String,
153    pub zh: String,
154}
155
156#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
157pub struct Synonym {
158    pub pos: Option<String>,
159    pub zh_meaning: String,
160    pub words: Vec<String>,
161}
162
163#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
164pub struct RelatedWord {
165    pub pos: String,
166    pub words: Vec<RelatedWordItem>,
167}
168
169#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
170pub struct RelatedWordItem {
171    pub word: String,
172    pub translation: String,
173}
174
175#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
176pub struct Exchange {
177    pub kind: String,
178    pub word: String,
179}
180
181#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
182#[serde(tag = "type", rename_all = "snake_case")]
183pub enum DictSource {
184    Ecdict,
185    Anki { deck_name: String },
186    Sqlite { name: String, table: String },
187    Mdx { filename: String },
188    Custom { name: String },
189}
190
191impl DictSource {
192    pub fn slug(&self) -> String {
193        match self {
194            Self::Ecdict => "ecdict".to_string(),
195            Self::Anki { deck_name } => format!("anki-{}", slugify(deck_name)),
196            Self::Sqlite { name, table } => format!("sqlite-{}-{}", slugify(name), slugify(table)),
197            Self::Mdx { filename } => format!("mdx-{}", slugify(filename)),
198            Self::Custom { name } => slugify(name),
199        }
200    }
201
202    pub fn display_name(&self) -> String {
203        match self {
204            Self::Ecdict => "ECDICT".to_string(),
205            Self::Anki { deck_name } => deck_name.clone(),
206            Self::Sqlite { name, table } => format!("{name}:{table}"),
207            Self::Mdx { filename } => filename.clone(),
208            Self::Custom { name } => name.clone(),
209        }
210    }
211}
212
213pub fn clean_pos(value: impl AsRef<str>) -> String {
214    let value = value.as_ref().trim().trim_end_matches('.');
215    let lower = value.to_ascii_lowercase();
216    let normalized = match lower.as_str() {
217        "noun" | "n" => "n".to_string(),
218        "verb" | "v" => "v".to_string(),
219        "vi" | "vt" => lower,
220        "adjective" | "adj" | "a" => "adj".to_string(),
221        "adverb" | "adv" => "adv".to_string(),
222        "prep" | "preposition" => "prep".to_string(),
223        "conj" | "conjunction" => "conj".to_string(),
224        "pron" | "pronoun" => "pron".to_string(),
225        "interj" | "int" => "int".to_string(),
226        "art" | "article" => "art".to_string(),
227        _ => lower,
228    };
229    normalized
230}
231
232pub fn normalize_tag(value: impl AsRef<str>) -> String {
233    let tag = value.as_ref().trim().to_ascii_lowercase();
234    match tag.as_str() {
235        "zk" | "zhongkao" | "中考" => "zk",
236        "gk" | "gaokao" | "高考" => "gk",
237        "ky" | "kaoyan" | "考研" => "kao_yan",
238        "cet-4" | "cet_4" => "cet4",
239        "cet-6" | "cet_6" => "cet6",
240        other => other,
241    }
242    .to_string()
243}
244
245pub fn clean_text(value: impl AsRef<str>) -> String {
246    value
247        .as_ref()
248        .split_whitespace()
249        .collect::<Vec<_>>()
250        .join(" ")
251        .trim()
252        .to_string()
253}
254
255fn slugify(value: &str) -> String {
256    let mut out = String::with_capacity(value.len());
257    for ch in value.chars() {
258        if ch.is_ascii_alphanumeric() {
259            out.push(ch.to_ascii_lowercase());
260        } else if ch.is_whitespace() || matches!(ch, '_' | '-' | '.' | '/') {
261            if !out.ends_with('-') {
262                out.push('-');
263            }
264        }
265    }
266    let out = out.trim_matches('-').to_string();
267    if out.is_empty() {
268        "source".to_string()
269    } else {
270        out
271    }
272}