#![deny(missing_docs)]
use scraper::Selector;
use std::sync::OnceLock;
#[derive(PartialEq, Eq, Clone, Debug)]
pub enum Lang {
Korean,
English,
Japanese,
Hanja,
Other(String),
}
#[derive(Clone, Debug)]
pub struct Word {
pub word: String,
pub meaning: Vec<String>,
pub pronounce: Option<String>,
pub lang: Lang,
}
impl std::fmt::Display for Word {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
if let Lang::Other(ref d) = self.lang {
write!(f, "({}) ", d)?;
}
write!(f, "{} ", self.word)?;
if let Some(ref pronounce) = self.pronounce {
write!(f, "{} ", pronounce)?;
}
write!(f, "{}", self.meaning.join(", "))
}
}
#[derive(Debug, Clone)]
pub struct Search {
pub words: Vec<Word>,
pub alternatives: Vec<String>,
}
#[derive(thiserror::Error, Debug)]
pub enum DaumdicError {
#[error("empty word was given")]
EmptyWord,
#[error("HTTP GET request failed")]
RequestFailed(#[from] reqwest::Error),
}
struct SelectorSet {
card: Selector,
item: Selector,
word: Selector,
lang: Selector,
pronounce: Selector,
meaning: Selector,
alternatives: Selector,
}
pub async fn search(word: &str) -> Result<Search, DaumdicError> {
if word.is_empty() {
return Err(DaumdicError::EmptyWord);
}
let client = reqwest::Client::new();
let url = format!("https://dic.daum.net/search.do?q={}", word);
let resp = client.get(&url).send().await?;
let body = resp.text().await?;
let document = scraper::Html::parse_document(&body);
static SELECTOR_CACHE: OnceLock<SelectorSet> = OnceLock::new();
let selector = SELECTOR_CACHE.get_or_init(|| SelectorSet {
card: Selector::parse(".card_word").unwrap(),
item: Selector::parse(".cleanword_type,.search_type").unwrap(),
word: Selector::parse(".txt_cleansch,.txt_searchword,.txt_hanjaword").unwrap(),
lang: Selector::parse(".tit_word").unwrap(),
pronounce: Selector::parse(".sub_read,.txt_pronounce").unwrap(),
meaning: Selector::parse(".txt_search").unwrap(),
alternatives: Selector::parse(".link_speller").unwrap(),
});
let words = document
.select(&selector.card)
.filter_map(|card| {
let lang = card
.select(&selector.lang)
.map(|element| element.text().collect::<Vec<_>>().join(""))
.map(|lang| {
if lang.starts_with("한국") {
Lang::Korean
} else if lang.starts_with('영') {
Lang::English
} else if lang.starts_with('일') {
Lang::Japanese
} else if lang.starts_with("한자") {
Lang::Hanja
} else {
Lang::Other(lang.to_string())
}
})
.next();
card.select(&selector.item)
.map(|item| {
let word = item
.select(&selector.word)
.map(|element| element.text().collect::<Vec<_>>().join(""))
.next();
let pronounce = item
.select(&selector.pronounce)
.map(|element| element.text().collect::<Vec<_>>().join(""))
.next();
let meaning = item
.select(&selector.meaning)
.map(|element| element.text().collect::<Vec<_>>().join(""))
.collect::<Vec<_>>();
(word, lang.clone(), pronounce, meaning)
})
.filter_map(|t| match t {
(Some(word), Some(lang), pronounce, meaning) => {
Some((word, lang, pronounce, meaning))
}
_ => None,
})
.map(|(word, lang, pronounce, meaning)| Word {
word,
lang,
pronounce,
meaning,
})
.next()
})
.collect();
let alternatives = document
.select(&selector.alternatives)
.map(|element| element.text().collect::<Vec<_>>().join(""))
.collect();
Ok(Search {
words,
alternatives,
})
}