use crate::{RobertError, UseSample, WordInfos};
use scraper::{Html, Selector};
const ROBERT_URL: &'static str = "http://dictionnaire.lerobert.com/definition";
pub const USER_AGENT: &'static str =
"Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/113.0";
fn url_of(word: &str) -> String {
format!("{}/{}", ROBERT_URL, word)
}
pub fn dom_for_word(word: &str) -> Result<Html, reqwest::Error> {
let client = reqwest::blocking::ClientBuilder::default()
.user_agent(USER_AGENT)
.build()?;
let response = client.get(url_of(&word)).send()?;
let text = response.text()?;
let dom = Html::parse_document(&text);
Ok(dom)
}
macro_rules! selector {
($expr: expr) => {
Selector::parse($expr).unwrap()
};
}
pub fn parse_def(dom: &Html) -> Result<Vec<String>, String> {
let def_span = selector!(r#"span[class="d_dfn"]"#);
let selector = def_span;
let mut def_dom = dom.select(&selector);
let mut to_return = Vec::<String>::new();
loop {
let def = def_dom.next();
match def {
Some(def) => {
to_return.push(def.text().collect());
}
None => return Ok(to_return),
}
}
}
pub fn parse_synonyms(dom: &Html) -> Result<Vec<String>, String> {
let syn_span = selector!(r#"span[class="s_syn"]"#);
let selector = syn_span;
let mut syn_dom = dom.select(&selector);
let mut to_return = Vec::<String>::new();
loop {
let def = syn_dom.next();
match def {
Some(def) => {
to_return.push(def.text().collect());
}
None => return Ok(to_return),
}
}
}
pub fn parse_samples(dom: &Html) -> Result<Vec<UseSample>, String> {
let sample_span = selector!(r#"div[class="ex_example"]"#);
let sample_src_span = selector!(r#"a[class="ex_author"]"#);
let samples_iter = dom.select(&sample_span);
let samples_src_iter = dom.select(&sample_src_span);
return Ok(std::iter::zip(samples_iter, samples_src_iter)
.map(|val| {
let sample: String = val.0.text().collect();
let src: String = val.1.text().collect();
UseSample {
sample: String::from(sample),
source: String::from(src),
}
})
.collect());
}