use serde::{Deserialize, Serialize};
use crate::tags::{BLACKLISTED_FORM_TAGS, IDENTITY_FORM_TAGS};
pub type Tag = String;
#[derive(Debug, Deserialize, Serialize, Default)]
#[serde(default)]
#[derive(rkyv::Archive, rkyv::Deserialize, rkyv::Serialize)]
pub struct WordEntry {
pub word: String,
pub pos: String,
pub lang_code: String,
pub head_templates: Vec<HeadTemplate>,
etymology_text: String, etymology_texts: Vec<String>,
pub sounds: Vec<Sound>,
pub senses: Vec<Sense>,
pub tags: Vec<Tag>,
pub topics: Vec<Tag>,
pub forms: Vec<Form>,
pub form_of: Vec<AltForm>,
pub alt_of: Vec<AltForm>,
pub synonyms: Vec<Synonym>,
pub translations: Vec<Translation>, }
#[derive(Debug, Deserialize, Serialize, Default)]
#[serde(default)]
#[derive(rkyv::Archive, rkyv::Deserialize, rkyv::Serialize)]
pub struct HeadTemplate {
pub expansion: String,
}
#[derive(Debug, Deserialize, Serialize, Default)]
#[serde(default)]
#[derive(rkyv::Archive, rkyv::Deserialize, rkyv::Serialize)]
pub struct Sound {
pub ipa: String,
pub tags: Vec<Tag>,
pub note: String,
pub zh_pron: String,
}
#[derive(Debug, Deserialize, Serialize, Default)]
#[serde(default)]
#[derive(rkyv::Archive, rkyv::Deserialize, rkyv::Serialize)]
pub struct Sense {
pub glosses: Vec<String>,
pub examples: Vec<Example>,
pub form_of: Vec<AltForm>,
pub alt_of: Vec<AltForm>,
pub tags: Vec<Tag>,
pub topics: Vec<Tag>,
}
pub type Offset = (usize, usize);
#[derive(Debug, Serialize, Deserialize, Default, Clone)]
#[serde(default)]
#[derive(rkyv::Archive, rkyv::Deserialize, rkyv::Serialize)]
pub struct Example {
pub text: String,
#[serde(skip_serializing_if = "String::is_empty")]
pub translation: String,
#[serde(skip_serializing_if = "String::is_empty")]
#[serde(rename = "ref")]
pub reference: String, #[serde(skip_serializing_if = "Vec::is_empty")]
pub bold_text_offsets: Vec<Offset>,
#[serde(skip_serializing_if = "Vec::is_empty")]
pub bold_translation_offsets: Vec<Offset>, }
#[derive(Debug, Deserialize, Serialize, Default)]
#[serde(default)]
#[derive(rkyv::Archive, rkyv::Deserialize, rkyv::Serialize)]
pub struct AltForm {
pub word: String,
}
#[derive(Debug, Deserialize, Serialize, Default)]
#[serde(default)]
#[derive(rkyv::Archive, rkyv::Deserialize, rkyv::Serialize)]
pub struct Form {
pub form: String,
pub tags: Vec<Tag>,
#[serde(skip_serializing_if = "Vec::is_empty")]
pub ruby: Vec<(String, String)>, }
#[derive(Debug, Deserialize, Serialize, Default, Clone)]
#[serde(default)]
#[derive(rkyv::Archive, rkyv::Deserialize, rkyv::Serialize)]
pub struct Synonym {
pub word: String,
}
#[derive(Debug, Deserialize, Serialize, Default)]
#[serde(default)]
#[derive(rkyv::Archive, rkyv::Deserialize, rkyv::Serialize)]
pub struct Translation {
pub lang_code: String,
pub word: String,
#[serde(skip_serializing_if = "String::is_empty")]
pub sense: String,
}
impl WordEntry {
pub fn is_participle(&self) -> bool {
self.pos == "verb" && self.tags.iter().any(|t| t == "participle")
}
fn tagged_forms<'a>(&'a self, tags: &[&str]) -> impl Iterator<Item = &'a Form> {
self.forms.iter().filter(|form| {
!form.form.is_empty() && tags.iter().all(|tag| form.tags.iter().any(|t| t == tag))
})
}
pub fn canonical_form(&self) -> Option<&Form> {
self.tagged_forms(&["canonical"]).next()
}
pub fn romanization_form(&self) -> Option<&Form> {
self.tagged_forms(&["romanization"]).next()
}
pub fn transliteration_form(&self) -> Option<&Form> {
self.tagged_forms(&["transliteration"]).next()
}
pub fn pinyin(&self) -> Option<&str> {
self.sounds.iter().find_map(|sound| {
if sound.tags.iter().any(|t| t == "Pinyin") {
Some(sound.zh_pron.as_ref())
} else {
None
}
})
}
pub fn contains_no_gloss(&self) -> bool {
self.senses.iter().all(|sense| sense.glosses.is_empty())
}
pub fn non_trivial_forms(&self) -> impl Iterator<Item = &Form> {
self.forms.iter().filter(move |form| {
if form.form.is_empty() || form.form == self.word {
return false;
}
if form.form.starts_with(['-', '‑']) {
return false;
}
let is_blacklisted = form
.tags
.iter()
.any(|tag| BLACKLISTED_FORM_TAGS.contains(&tag.as_str()));
let is_identity = form
.tags
.iter()
.all(|tag| IDENTITY_FORM_TAGS.contains(&tag.as_str()));
if is_blacklisted || is_identity {
return false;
}
true
})
}
pub fn non_trivial_translations(&self) -> impl Iterator<Item = &Translation> {
self.translations
.iter()
.filter(move |translation| !translation.word.is_empty())
}
pub fn etymology_texts(&self) -> Option<Vec<&str>> {
if !self.etymology_texts.is_empty() {
Some(self.etymology_texts.iter().map(String::as_ref).collect())
} else if !self.etymology_text.is_empty() {
Some(vec![&self.etymology_text])
} else {
None
}
}
}