le-robert 0.1.0

Get French word definitions, synonyms and use samples from the dictionary Le Robert
Documentation
use crate::{RobertError, UseSample, WordInfos};

use scraper::{Html, Selector};

const ROBERT_URL: &'static str = "http://dictionnaire.lerobert.com/definition";

/// Le Robert doesn't accept Reqwest's default UA, so I put my own
/// Note that despite this restriction they **do not** prohibit scraping within their EULA
pub const USER_AGENT: &'static str =
    "Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/113.0";

fn url_of(word: &str) -> String {
    format!("{}/{}", ROBERT_URL, word)
}

pub fn dom_for_word(word: &str) -> Result<Html, reqwest::Error> {
    let client = reqwest::blocking::ClientBuilder::default()
        .user_agent(USER_AGENT)
        .build()?;

    let response = client.get(url_of(&word)).send()?;
    let text = response.text()?;

    let dom = Html::parse_document(&text);

    Ok(dom)
}

macro_rules! selector {
    ($expr: expr) => {
        Selector::parse($expr).unwrap()
    };
}

/// Parses definitions of the word
pub fn parse_def(dom: &Html) -> Result<Vec<String>, String> {
    let def_span = selector!(r#"span[class="d_dfn"]"#);
    let selector = def_span;
    let mut def_dom = dom.select(&selector);

    let mut to_return = Vec::<String>::new();

    loop {
        let def = def_dom.next();

        match def {
            Some(def) => {
                to_return.push(def.text().collect());
            }
            None => return Ok(to_return),
        }
    }
}

/// Parses synonyms of the word
pub fn parse_synonyms(dom: &Html) -> Result<Vec<String>, String> {
    let syn_span = selector!(r#"span[class="s_syn"]"#);
    let selector = syn_span;
    let mut syn_dom = dom.select(&selector);

    let mut to_return = Vec::<String>::new();

    loop {
        let def = syn_dom.next();

        match def {
            Some(def) => {
                to_return.push(def.text().collect());
            }
            None => return Ok(to_return),
        }
    }
}

/// Parses samples of the word
pub fn parse_samples(dom: &Html) -> Result<Vec<UseSample>, String> {
    let sample_span = selector!(r#"div[class="ex_example"]"#);
    let sample_src_span = selector!(r#"a[class="ex_author"]"#);
    let samples_iter = dom.select(&sample_span);
    let samples_src_iter = dom.select(&sample_src_span);

    return Ok(std::iter::zip(samples_iter, samples_src_iter)
        .map(|val| {
            let sample: String = val.0.text().collect();
            let src: String = val.1.text().collect();

            UseSample {
                sample: String::from(sample),
                source: String::from(src),
            }
        })
        .collect());
}