charcoal-dict 0.2.7

A command line dictionary
use crate::word::{Acquire, QueryYoudict, Request, Select};
use crate::{ExactQuery, Question, SingleEntry};
use scraper::{ElementRef, Html, Selector};
use whatlang::Lang;

impl Acquire for QueryYoudict {
    type WordQuery = ExactQuery;
    type WordEntry = SingleEntry;
    fn acquire(self, word_query: &ExactQuery) -> anyhow::Result<SingleEntry> {
        let doc = self.request(word_query)?;
        QueryYoudict::select(doc.root_element(), word_query)
    }
}

impl Request for QueryYoudict {
    type WordQuery = ExactQuery;
    fn request(self, word_query: &ExactQuery) -> anyhow::Result<Html> {
        async fn get_html(url: impl AsRef<str> + reqwest::IntoUrl) -> anyhow::Result<String> {
            let body = reqwest::get(url).await?.text().await?;
            Ok(body)
        }
        let youdao_dict_url = url::Url::parse(&format!(
            "http://dict.youdao.com/search?q={}",
            word_query.word()
        ))?;

        let xml = futures::executor::block_on(async { get_html(youdao_dict_url).await })?;
        let doc = scraper::Html::parse_document(&xml);

        Ok(doc)
    }
}

impl Select for QueryYoudict {
    type WordQuery = ExactQuery;
    type Target = SingleEntry;

    fn select(elem: ElementRef, word_query: &ExactQuery) -> anyhow::Result<Self::Target> {
        let doc = elem;
        let pronunciation = {
            let sel = Selector::parse("span.pronounce").unwrap();
            doc.select(&sel)
                .filter_map(|child| {
                    let mut iter = child.text().filter_map(trim_str);
                    match (iter.next(), iter.next()) {
                        (Some(region), Some(pron)) => Some((region, pron)),
                        _ => None,
                    }
                })
                .collect()
        };

        let brief = {
            let sel = Selector::parse("#phrsListTab .trans-container ul li").unwrap();
            doc.select(&sel)
                .map(|child| {
                    child
                        .text()
                        .filter_map(trim_str)
                        .collect::<Vec<String>>()
                        .join("")
                })
                .collect()
        };

        let variants = {
            let sel = Selector::parse("#phrsListTab .trans-container p").unwrap();
            doc.select(&sel)
                .flat_map(|child| {
                    child.text().map(|t| {
                        t.split('\n')
                            .filter_map(trim_str)
                            .collect::<Vec<String>>()
                            .join(" ")
                    })
                })
                .filter(|s| !s.is_empty())
                .collect()
        };

        let sentence = Sentence::select(elem, word_query)?;

        Ok(SingleEntry {
            pronunciation,
            brief,
            variants,
            authority: Vec::new(),
            sentence,
        })
    }
}

pub struct Sentence;

impl Select for Sentence {
    type WordQuery = ExactQuery;
    type Target = Vec<(String, String)>;

    fn select(elem: ElementRef, word_query: &ExactQuery) -> anyhow::Result<Self::Target> {
        let sel = Selector::parse("#bilingual.trans-container li").unwrap();
        Ok(elem
            .select(&sel)
            .filter_map(|child| Sen::select(child, word_query).ok())
            .collect())
    }
}

struct Sen;
const PUNCTUATORS: &[char; 10] = &['.', ',', '\"', '\'', '?', '!', ':', '-', '<', '>'];

impl Select for Sen {
    type WordQuery = ExactQuery;
    type Target = (String, String);

    fn select(elem: ElementRef, word_query: &ExactQuery) -> anyhow::Result<Self::Target> {
        let sel = Selector::parse("p").unwrap();
        let mut iter = elem.select(&sel);

        let mut extract_to_vec = |msg| -> Vec<String> {
            iter.next()
                .expect(msg)
                .text()
                .filter_map(trim_str)
                .collect()
        };

        fn western_concat(vec: Vec<String>) -> String {
            let mut ori = String::new();
            let mut ori_iter = vec.into_iter();
            if let Some(s) = ori_iter.next() {
                ori.push_str(&s)
            }
            for mut s in ori_iter {
                if !s.starts_with(PUNCTUATORS) {
                    s.insert(0, ' ')
                }
                ori.push_str(&s)
            }
            ori
        }

        fn eastern_concat(vec: Vec<String>) -> String {
            vec.join("")
        }

        let ori_vec = extract_to_vec("No ori found in sentence");
        let trans_vec = extract_to_vec("No trans found in sentence");

        let (ori, trans) = if matches!(
            word_query.inferred_lang(),
            Lang::Cmn | Lang::Jpn | Lang::Kor
        ) {
            (western_concat(ori_vec), eastern_concat(trans_vec))
        } else {
            (eastern_concat(ori_vec), western_concat(trans_vec))
        };
        Ok((ori, trans))
    }
}

fn trim_str(t: &str) -> Option<String> {
    let t = t.trim();
    if t.is_empty() {
        None
    } else {
        Some(t.to_owned())
    }
}