use serde::{Deserialize, Serialize};
use serde_json::Value;
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct DictEntry {
pub id: String,
pub word: String,
pub word_lower: String,
pub phonetic_uk: Option<String>,
pub phonetic_us: Option<String>,
pub definitions: Vec<Definition>,
pub pos: Vec<String>,
pub collins_star: u8,
pub oxford_3000: bool,
pub tags: Vec<String>,
pub freq_bnc: Option<u32>,
pub freq_coca: Option<u32>,
pub exchanges: Vec<Exchange>,
pub examples: Vec<Example>,
pub synonyms: Vec<Synonym>,
pub phrases: Vec<Phrase>,
pub related_words: Vec<RelatedWord>,
pub mnemonic: Option<String>,
pub source: DictSource,
pub extra: Value,
}
impl DictEntry {
pub fn new(source: DictSource, word: impl Into<String>) -> Self {
let word = word.into();
let word_lower = word.to_lowercase();
let id = format!("{}:{}", source.slug(), word_lower);
Self {
id,
word,
word_lower,
phonetic_uk: None,
phonetic_us: None,
definitions: Vec::new(),
pos: Vec::new(),
collins_star: 0,
oxford_3000: false,
tags: Vec::new(),
freq_bnc: None,
freq_coca: None,
exchanges: Vec::new(),
examples: Vec::new(),
synonyms: Vec::new(),
phrases: Vec::new(),
related_words: Vec::new(),
mnemonic: None,
source,
extra: Value::Null,
}
}
pub fn all_text(&self) -> String {
let mut parts = Vec::new();
parts.push(self.word.clone());
parts.push(self.word_lower.clone());
if let Some(value) = &self.phonetic_uk {
parts.push(value.clone());
}
if let Some(value) = &self.phonetic_us {
parts.push(value.clone());
}
for definition in &self.definitions {
parts.push(definition.pos.clone().unwrap_or_default());
parts.push(definition.zh.clone());
parts.push(definition.en.clone());
}
for example in &self.examples {
parts.push(example.en.clone());
parts.push(example.zh.clone());
}
for phrase in &self.phrases {
parts.push(phrase.en.clone());
parts.push(phrase.zh.clone());
}
for synonym in &self.synonyms {
parts.push(synonym.zh_meaning.clone());
parts.extend(synonym.words.clone());
}
for related in &self.related_words {
parts.push(related.pos.clone());
for word in &related.words {
parts.push(word.word.clone());
parts.push(word.translation.clone());
}
}
if let Some(value) = &self.mnemonic {
parts.push(value.clone());
}
parts.join(" ")
}
pub fn primary_translation(&self) -> Option<&str> {
self.definitions
.iter()
.map(|definition| definition.zh.trim())
.find(|value| !value.is_empty())
}
pub fn primary_definition(&self) -> Option<&str> {
self.definitions
.iter()
.map(|definition| definition.en.trim())
.find(|value| !value.is_empty())
}
pub fn phonetic(&self) -> Option<&str> {
self.phonetic_uk
.as_deref()
.filter(|value| !value.trim().is_empty())
.or_else(|| {
self.phonetic_us
.as_deref()
.filter(|value| !value.trim().is_empty())
})
}
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct Definition {
pub en: String,
pub zh: String,
pub pos: Option<String>,
}
impl Definition {
pub fn new(en: impl Into<String>, zh: impl Into<String>, pos: Option<String>) -> Self {
Self {
en: clean_text(en.into()),
zh: clean_text(zh.into()),
pos: pos.map(clean_pos).filter(|value| !value.is_empty()),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct Example {
pub en: String,
pub zh: String,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct Phrase {
pub en: String,
pub zh: String,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct Synonym {
pub pos: Option<String>,
pub zh_meaning: String,
pub words: Vec<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct RelatedWord {
pub pos: String,
pub words: Vec<RelatedWordItem>,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct RelatedWordItem {
pub word: String,
pub translation: String,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct Exchange {
pub kind: String,
pub word: String,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum DictSource {
Ecdict,
Anki { deck_name: String },
Sqlite { name: String, table: String },
Mdx { filename: String },
Custom { name: String },
}
impl DictSource {
pub fn slug(&self) -> String {
match self {
Self::Ecdict => "ecdict".to_string(),
Self::Anki { deck_name } => format!("anki-{}", slugify(deck_name)),
Self::Sqlite { name, table } => format!("sqlite-{}-{}", slugify(name), slugify(table)),
Self::Mdx { filename } => format!("mdx-{}", slugify(filename)),
Self::Custom { name } => slugify(name),
}
}
pub fn display_name(&self) -> String {
match self {
Self::Ecdict => "ECDICT".to_string(),
Self::Anki { deck_name } => deck_name.clone(),
Self::Sqlite { name, table } => format!("{name}:{table}"),
Self::Mdx { filename } => filename.clone(),
Self::Custom { name } => name.clone(),
}
}
}
pub fn clean_pos(value: impl AsRef<str>) -> String {
let value = value.as_ref().trim().trim_end_matches('.');
let lower = value.to_ascii_lowercase();
let normalized = match lower.as_str() {
"noun" | "n" => "n".to_string(),
"verb" | "v" => "v".to_string(),
"vi" | "vt" => lower,
"adjective" | "adj" | "a" => "adj".to_string(),
"adverb" | "adv" => "adv".to_string(),
"prep" | "preposition" => "prep".to_string(),
"conj" | "conjunction" => "conj".to_string(),
"pron" | "pronoun" => "pron".to_string(),
"interj" | "int" => "int".to_string(),
"art" | "article" => "art".to_string(),
_ => lower,
};
normalized
}
pub fn normalize_tag(value: impl AsRef<str>) -> String {
let tag = value.as_ref().trim().to_ascii_lowercase();
match tag.as_str() {
"zk" | "zhongkao" | "中考" => "zk",
"gk" | "gaokao" | "高考" => "gk",
"ky" | "kaoyan" | "考研" => "kao_yan",
"cet-4" | "cet_4" => "cet4",
"cet-6" | "cet_6" => "cet6",
other => other,
}
.to_string()
}
pub fn clean_text(value: impl AsRef<str>) -> String {
value
.as_ref()
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
.trim()
.to_string()
}
fn slugify(value: &str) -> String {
let mut out = String::with_capacity(value.len());
for ch in value.chars() {
if ch.is_ascii_alphanumeric() {
out.push(ch.to_ascii_lowercase());
} else if ch.is_whitespace() || matches!(ch, '_' | '-' | '.' | '/') {
if !out.ends_with('-') {
out.push('-');
}
}
}
let out = out.trim_matches('-').to_string();
if out.is_empty() {
"source".to_string()
} else {
out
}
}