dictx-core 0.1.0

Core data types and query models for DictX terminal dictionary.
Documentation
use serde::{Deserialize, Serialize};
use serde_json::Value;

#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct DictEntry {
    pub id: String,
    pub word: String,
    pub word_lower: String,
    pub phonetic_uk: Option<String>,
    pub phonetic_us: Option<String>,
    pub definitions: Vec<Definition>,
    pub pos: Vec<String>,
    pub collins_star: u8,
    pub oxford_3000: bool,
    pub tags: Vec<String>,
    pub freq_bnc: Option<u32>,
    pub freq_coca: Option<u32>,
    pub exchanges: Vec<Exchange>,
    pub examples: Vec<Example>,
    pub synonyms: Vec<Synonym>,
    pub phrases: Vec<Phrase>,
    pub related_words: Vec<RelatedWord>,
    pub mnemonic: Option<String>,
    pub source: DictSource,
    pub extra: Value,
}

impl DictEntry {
    pub fn new(source: DictSource, word: impl Into<String>) -> Self {
        let word = word.into();
        let word_lower = word.to_lowercase();
        let id = format!("{}:{}", source.slug(), word_lower);

        Self {
            id,
            word,
            word_lower,
            phonetic_uk: None,
            phonetic_us: None,
            definitions: Vec::new(),
            pos: Vec::new(),
            collins_star: 0,
            oxford_3000: false,
            tags: Vec::new(),
            freq_bnc: None,
            freq_coca: None,
            exchanges: Vec::new(),
            examples: Vec::new(),
            synonyms: Vec::new(),
            phrases: Vec::new(),
            related_words: Vec::new(),
            mnemonic: None,
            source,
            extra: Value::Null,
        }
    }

    pub fn all_text(&self) -> String {
        let mut parts = Vec::new();
        parts.push(self.word.clone());
        parts.push(self.word_lower.clone());

        if let Some(value) = &self.phonetic_uk {
            parts.push(value.clone());
        }
        if let Some(value) = &self.phonetic_us {
            parts.push(value.clone());
        }

        for definition in &self.definitions {
            parts.push(definition.pos.clone().unwrap_or_default());
            parts.push(definition.zh.clone());
            parts.push(definition.en.clone());
        }
        for example in &self.examples {
            parts.push(example.en.clone());
            parts.push(example.zh.clone());
        }
        for phrase in &self.phrases {
            parts.push(phrase.en.clone());
            parts.push(phrase.zh.clone());
        }
        for synonym in &self.synonyms {
            parts.push(synonym.zh_meaning.clone());
            parts.extend(synonym.words.clone());
        }
        for related in &self.related_words {
            parts.push(related.pos.clone());
            for word in &related.words {
                parts.push(word.word.clone());
                parts.push(word.translation.clone());
            }
        }
        if let Some(value) = &self.mnemonic {
            parts.push(value.clone());
        }

        parts.join(" ")
    }

    pub fn primary_translation(&self) -> Option<&str> {
        self.definitions
            .iter()
            .map(|definition| definition.zh.trim())
            .find(|value| !value.is_empty())
    }

    pub fn primary_definition(&self) -> Option<&str> {
        self.definitions
            .iter()
            .map(|definition| definition.en.trim())
            .find(|value| !value.is_empty())
    }

    pub fn phonetic(&self) -> Option<&str> {
        self.phonetic_uk
            .as_deref()
            .filter(|value| !value.trim().is_empty())
            .or_else(|| {
                self.phonetic_us
                    .as_deref()
                    .filter(|value| !value.trim().is_empty())
            })
    }
}

#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct Definition {
    pub en: String,
    pub zh: String,
    pub pos: Option<String>,
}

impl Definition {
    pub fn new(en: impl Into<String>, zh: impl Into<String>, pos: Option<String>) -> Self {
        Self {
            en: clean_text(en.into()),
            zh: clean_text(zh.into()),
            pos: pos.map(clean_pos).filter(|value| !value.is_empty()),
        }
    }
}

#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct Example {
    pub en: String,
    pub zh: String,
}

#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct Phrase {
    pub en: String,
    pub zh: String,
}

#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct Synonym {
    pub pos: Option<String>,
    pub zh_meaning: String,
    pub words: Vec<String>,
}

#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct RelatedWord {
    pub pos: String,
    pub words: Vec<RelatedWordItem>,
}

#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct RelatedWordItem {
    pub word: String,
    pub translation: String,
}

#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct Exchange {
    pub kind: String,
    pub word: String,
}

#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum DictSource {
    Ecdict,
    Anki { deck_name: String },
    Sqlite { name: String, table: String },
    Mdx { filename: String },
    Custom { name: String },
}

impl DictSource {
    pub fn slug(&self) -> String {
        match self {
            Self::Ecdict => "ecdict".to_string(),
            Self::Anki { deck_name } => format!("anki-{}", slugify(deck_name)),
            Self::Sqlite { name, table } => format!("sqlite-{}-{}", slugify(name), slugify(table)),
            Self::Mdx { filename } => format!("mdx-{}", slugify(filename)),
            Self::Custom { name } => slugify(name),
        }
    }

    pub fn display_name(&self) -> String {
        match self {
            Self::Ecdict => "ECDICT".to_string(),
            Self::Anki { deck_name } => deck_name.clone(),
            Self::Sqlite { name, table } => format!("{name}:{table}"),
            Self::Mdx { filename } => filename.clone(),
            Self::Custom { name } => name.clone(),
        }
    }
}

pub fn clean_pos(value: impl AsRef<str>) -> String {
    let value = value.as_ref().trim().trim_end_matches('.');
    let lower = value.to_ascii_lowercase();
    let normalized = match lower.as_str() {
        "noun" | "n" => "n".to_string(),
        "verb" | "v" => "v".to_string(),
        "vi" | "vt" => lower,
        "adjective" | "adj" | "a" => "adj".to_string(),
        "adverb" | "adv" => "adv".to_string(),
        "prep" | "preposition" => "prep".to_string(),
        "conj" | "conjunction" => "conj".to_string(),
        "pron" | "pronoun" => "pron".to_string(),
        "interj" | "int" => "int".to_string(),
        "art" | "article" => "art".to_string(),
        _ => lower,
    };
    normalized
}

pub fn normalize_tag(value: impl AsRef<str>) -> String {
    let tag = value.as_ref().trim().to_ascii_lowercase();
    match tag.as_str() {
        "zk" | "zhongkao" | "中考" => "zk",
        "gk" | "gaokao" | "高考" => "gk",
        "ky" | "kaoyan" | "考研" => "kao_yan",
        "cet-4" | "cet_4" => "cet4",
        "cet-6" | "cet_6" => "cet6",
        other => other,
    }
    .to_string()
}

pub fn clean_text(value: impl AsRef<str>) -> String {
    value
        .as_ref()
        .split_whitespace()
        .collect::<Vec<_>>()
        .join(" ")
        .trim()
        .to_string()
}

fn slugify(value: &str) -> String {
    let mut out = String::with_capacity(value.len());
    for ch in value.chars() {
        if ch.is_ascii_alphanumeric() {
            out.push(ch.to_ascii_lowercase());
        } else if ch.is_whitespace() || matches!(ch, '_' | '-' | '.' | '/') {
            if !out.ends_with('-') {
                out.push('-');
            }
        }
    }
    let out = out.trim_matches('-').to_string();
    if out.is_empty() {
        "source".to_string()
    } else {
        out
    }
}