clitrans 0.4.1

Yet another command-line translator
#[cfg(test)]
mod test;

use std::sync::LazyLock;

use super::*;
use regex::Regex;
use scraper::{ElementRef, Html, Selector};
use url::Url;

#[derive(Clone)]
pub struct Translator;

impl Translate for Translator {
    fn translate(&self, input: &str) -> Result<Option<Translation>> {
        let url: Url = format!("https://cn.bing.com/dict/search?q={input}&mkt=zh-cn").parse()?;
        let resp = ureq::request_url("GET", &url)
            .set("Accept-Encoding", "gzip")
            .set("Accept-Language", "en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7")
            .call()?
            .into_string()?;
        Ok(parse(url, &resp))
    }
}

fn parse(url: Url, body: &str) -> Option<Translation> {
    let root = Html::parse_document(body);
    let content = get_element(
        &root,
        r#"
            body
            .contentPadding
            .b_cards
            .b_cards
            .lf_area
        "#,
    )?;
    let query = get_text(content, ".qdef .hd_area #headword")
        .into_iter()
        .next()
        .expect("query not found");
    let prons = parse_pronounciations(content);
    let exps = parse_explanation(content);
    Some(
        Translation::new(query, url.to_string())
            .pronunciations(prons)
            .explanations(exps),
    )
}

fn parse_pronounciations(detail: ElementRef) -> Vec<Pronunciation> {
    static RE_PY: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\[(.*?)]").unwrap());
    static RE_US: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"US\s*\[(.*?)]").unwrap());
    static RE_UK: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"UK\s*\[(.*?)]").unwrap());
    static RE_MP3: LazyLock<Regex> = LazyLock::new(|| Regex::new("https?://.*?.mp3").unwrap());
    let mut prons = vec![];
    let selector = Selector::parse(".hd_p1_1").unwrap();
    if let Some(node) = detail.select(&selector).next() {
        if node.children().count() == 1 {
            let pron: String = node.text().collect();
            if let Some(caps) = RE_PY.captures(&pron) {
                prons.push(Pronunciation::pinyin(caps[1].to_owned()));
            }
        } else {
            let selector = Selector::parse(".hd_p1_1 div").unwrap();
            let mut it = detail.select(&selector);
            while let Some(div) = it.next() {
                let pron: String = div.text().collect();
                let audio = it.next().and_then(|div| {
                    div.children().next().and_then(|a| {
                        a.value()
                            .as_element()
                            .unwrap()
                            .attr("onclick")
                            .and_then(|s| {
                                RE_MP3
                                    .captures(s)
                                    .and_then(|caps| caps.get(0).map(|url| url.as_str().to_owned()))
                            })
                    })
                });
                if let Some(caps) = RE_US.captures(&pron) {
                    prons.push(Pronunciation::us(caps[1].to_owned()).audio(audio));
                } else if let Some(caps) = RE_UK.captures(&pron) {
                    prons.push(Pronunciation::uk(caps[1].to_owned()).audio(audio));
                }
            }
        }
    }
    prons
}

fn parse_explanation(detail: ElementRef) -> Vec<Explanation> {
    let s_li = Selector::parse(".qdef ul li").unwrap();
    let s_pos = Selector::parse(".pos").unwrap();
    let s_def = Selector::parse(".def").unwrap();
    let mut exps = vec![];
    for li in detail.select(&s_li) {
        let pos: String = li
            .select(&s_pos)
            .next()
            .expect("pos not found")
            .text()
            .collect();
        let def: String = li
            .select(&s_def)
            .next()
            .expect("def not found")
            .text()
            .collect();
        let tag = match pos.trim() {
            "网络" => ExpTag::Web,
            s => ExpTag::Pos(s.to_owned()),
        };
        let items = def
            .split(&['', ';'][..])
            .map(|v| v.trim().to_owned())
            .collect();
        exps.push(Explanation { tag, items });
    }
    exps
}