1use serde::Deserialize;
2
3#[derive(Debug, Clone, PartialEq)]
5pub enum MatchType {
6 Exact,
7 Prefix,
8 Deinflected,
9 Fuzzy,
10 Gloss,
12}
13
14#[derive(Debug, Clone, PartialEq)]
16pub enum MatchMode {
17 Exact,
19 Prefix,
21 Deinflect,
23 Fuzzy,
25}
26
27#[derive(Debug, Clone)]
29pub struct DeinflectionInfo {
30 pub original_form: String,
31 pub base_form: String,
32 pub rules: Vec<String>,
33}
34
35#[derive(Debug, Clone)]
37pub struct LookupResult {
38 pub entry: Entry,
39 pub match_type: MatchType,
40 pub match_key: String,
41 pub score: f64,
42 pub deinflection: Option<DeinflectionInfo>,
43}
44
45pub const MAGIC: &[u8; 4] = b"JMDF";
49
50pub const FORMAT_VERSION: u32 = 4;
60
61pub const JMDICT_VERSION: &str = "3.6.1";
67
68#[derive(Debug, Clone)]
70pub struct DataVersion {
71 pub format_version: u32,
72 pub jmdict_version: String,
73 pub generated_at: String,
74}
75
76#[derive(Debug, Deserialize, Clone)]
77pub struct Entry {
78 pub id: String,
79 pub kanji: Vec<KanjiEntry>,
80 pub kana: Vec<KanaEntry>,
81 pub sense: Vec<SenseEntry>,
82}
83
84impl Entry {
85 pub fn primary_kanji(&self) -> Option<&str> {
88 self.kanji.first().map(|k| k.text.as_str())
89 }
90
91 pub fn primary_kana(&self) -> Option<&str> {
95 self.kana.first().map(|k| k.text.as_str())
96 }
97
98 pub fn headword(&self) -> Option<&str> {
100 self.primary_kanji().or_else(|| self.primary_kana())
101 }
102
103 pub fn is_common(&self) -> bool {
105 self.kanji.iter().any(|k| k.common) || self.kana.iter().any(|k| k.common)
106 }
107
108 pub fn glosses<'a>(&'a self, lang: &'a str) -> impl Iterator<Item = &'a str> + 'a {
110 self.sense
111 .iter()
112 .flat_map(move |s| s.gloss.iter())
113 .filter(move |g| g.lang == lang)
114 .map(|g| g.text.as_str())
115 }
116
117 pub fn parts_of_speech(&self) -> Vec<&str> {
119 let mut seen = Vec::new();
120 for s in &self.sense {
121 for p in &s.part_of_speech {
122 if !seen.iter().any(|x: &&str| *x == p.as_str()) {
123 seen.push(p.as_str());
124 }
125 }
126 }
127 seen
128 }
129}
130
131#[derive(Debug, Deserialize, Clone)]
132pub struct KanjiEntry {
133 pub common: bool,
134 pub text: String,
135 pub tags: Vec<String>,
136}
137
138#[derive(Debug, Deserialize, Clone)]
139pub struct KanaEntry {
140 pub common: bool,
141 pub text: String,
142 pub tags: Vec<String>,
143 #[serde(rename = "appliesToKanji")]
144 pub applies_to_kanji: Vec<String>,
145}
146
147#[derive(Debug, Deserialize, Clone)]
148pub struct Xref {
149 pub term: String,
150 pub reading: Option<String>,
151 pub sense_index: Option<u32>,
152}
153
154#[derive(Debug, Deserialize, Clone)]
155pub struct LanguageSource {
156 pub lang: String,
157 pub full: bool,
158 pub wasei: bool,
159 pub text: Option<String>,
160}
161
162#[derive(Debug, Deserialize, Clone)]
163pub struct SenseEntry {
164 #[serde(rename = "partOfSpeech")]
165 pub part_of_speech: Vec<String>,
166 #[serde(rename = "appliesToKanji")]
167 pub applies_to_kanji: Vec<String>,
168 #[serde(rename = "appliesToKana")]
169 pub applies_to_kana: Vec<String>,
170 pub related: Vec<Xref>,
171 pub antonym: Vec<Xref>,
172 pub field: Vec<String>,
173 pub dialect: Vec<String>,
174 pub misc: Vec<String>,
175 pub info: Vec<String>,
176 pub language_source: Vec<LanguageSource>,
177 pub gloss: Vec<GlossEntry>,
178}
179
180#[derive(Debug, Deserialize, Clone)]
181pub struct GlossEntry {
182 pub lang: String,
183 pub gender: Option<String>,
184 #[serde(rename = "type")]
185 pub gloss_type: Option<String>,
186 pub text: String,
187}
188
189#[cfg(test)]
190mod tests {
191 use super::*;
192
193 fn gloss(lang: &str, text: &str) -> GlossEntry {
194 GlossEntry {
195 lang: lang.into(),
196 gender: None,
197 gloss_type: None,
198 text: text.into(),
199 }
200 }
201
202 fn sense(pos: &[&str], glosses: Vec<GlossEntry>) -> SenseEntry {
203 SenseEntry {
204 part_of_speech: pos.iter().map(|s| s.to_string()).collect(),
205 applies_to_kanji: Vec::new(),
206 applies_to_kana: Vec::new(),
207 related: Vec::new(),
208 antonym: Vec::new(),
209 field: Vec::new(),
210 dialect: Vec::new(),
211 misc: Vec::new(),
212 info: Vec::new(),
213 language_source: Vec::new(),
214 gloss: glosses,
215 }
216 }
217
218 fn kanji(text: &str, common: bool) -> KanjiEntry {
219 KanjiEntry {
220 common,
221 text: text.into(),
222 tags: Vec::new(),
223 }
224 }
225
226 fn kana(text: &str, common: bool) -> KanaEntry {
227 KanaEntry {
228 common,
229 text: text.into(),
230 tags: Vec::new(),
231 applies_to_kanji: Vec::new(),
232 }
233 }
234
235 fn entry(kanji: Vec<KanjiEntry>, kana: Vec<KanaEntry>, sense: Vec<SenseEntry>) -> Entry {
236 Entry {
237 id: "test".into(),
238 kanji,
239 kana,
240 sense,
241 }
242 }
243
244 #[test]
245 fn primary_kanji_and_kana_first_element() {
246 let e = entry(
247 vec![kanji("猫", true), kanji("ねこ", false)],
248 vec![kana("ねこ", true)],
249 vec![],
250 );
251 assert_eq!(e.primary_kanji(), Some("猫"));
252 assert_eq!(e.primary_kana(), Some("ねこ"));
253 }
254
255 #[test]
256 fn primary_kanji_none_when_kana_only() {
257 let e = entry(vec![], vec![kana("にゃんこ", false)], vec![]);
258 assert!(e.primary_kanji().is_none());
259 assert_eq!(e.primary_kana(), Some("にゃんこ"));
260 }
261
262 #[test]
263 fn headword_prefers_kanji_falls_back_to_kana() {
264 let with_kanji = entry(vec![kanji("猫", false)], vec![kana("ねこ", false)], vec![]);
265 assert_eq!(with_kanji.headword(), Some("猫"));
266
267 let kana_only = entry(vec![], vec![kana("にゃんこ", false)], vec![]);
268 assert_eq!(kana_only.headword(), Some("にゃんこ"));
269
270 let empty = entry(vec![], vec![], vec![]);
271 assert!(empty.headword().is_none());
272 }
273
274 #[test]
275 fn is_common_true_if_any_form_is_common() {
276 let kanji_common = entry(vec![kanji("猫", true)], vec![kana("ねこ", false)], vec![]);
277 assert!(kanji_common.is_common());
278
279 let kana_common = entry(vec![kanji("猫", false)], vec![kana("ねこ", true)], vec![]);
280 assert!(kana_common.is_common());
281
282 let neither = entry(vec![kanji("猫", false)], vec![kana("ねこ", false)], vec![]);
283 assert!(!neither.is_common());
284 }
285
286 #[test]
287 fn glosses_filter_by_lang() {
288 let e = entry(
289 vec![],
290 vec![kana("ねこ", false)],
291 vec![sense(
292 &["n"],
293 vec![gloss("eng", "cat"), gloss("fre", "chat"), gloss("eng", "feline")],
294 )],
295 );
296 let eng: Vec<&str> = e.glosses("eng").collect();
297 assert_eq!(eng, vec!["cat", "feline"]);
298
299 let fre: Vec<&str> = e.glosses("fre").collect();
300 assert_eq!(fre, vec!["chat"]);
301
302 let missing: Vec<&str> = e.glosses("jpn").collect();
303 assert!(missing.is_empty());
304 }
305
306 #[test]
307 fn parts_of_speech_dedup_in_first_seen_order() {
308 let e = entry(
309 vec![],
310 vec![kana("ねこ", false)],
311 vec![
312 sense(&["v1", "vt"], vec![]),
313 sense(&["vt", "vi"], vec![]),
314 sense(&["v1"], vec![]),
315 ],
316 );
317 assert_eq!(e.parts_of_speech(), vec!["v1", "vt", "vi"]);
318 }
319}