sc_scraping/
parse_web_bio.rs

1use log::error;
2use scraper::{ElementRef, Selector};
3
4use crate::error::ScScrapingError;
5
6pub(crate) struct WebBio {
7    pub website: Option<String>,
8    pub bio: Vec<String>,
9}
10
11pub(crate) fn parse_web_bio<'a>(element: &ElementRef) -> Result<WebBio, ScScrapingError<'a>> {
12    let selector = Selector::parse("div.profile-content > div.right-col").unwrap();
13
14    let fragment = element.select(&selector).next().ok_or_else(|| {
15        let message = "Web and bio parent could not be found";
16        error!("{message}");
17        ScScrapingError::citizen(message)
18    })?;
19
20    let web_bio = WebBio {
21        website: parse_website(&fragment),
22        bio: parse_bio(&fragment),
23    };
24
25    Ok(web_bio)
26}
27
28fn parse_website(element: &ElementRef) -> Option<String> {
29    let selector = Selector::parse("p.website > a").unwrap();
30
31    element.select(&selector).map(|w| w.inner_html()).next()
32}
33
34fn parse_bio(element: &ElementRef) -> Vec<String> {
35    let selector = Selector::parse("div.entry.bio > div.value").unwrap();
36
37    element
38        .select(&selector)
39        .flat_map(|e| e.children())
40        .filter_map(|e| e.value().as_text())
41        .map(|t| t.trim())
42        .filter(|t| !t.is_empty())
43        .map(|t| t.to_string())
44        .collect::<Vec<_>>()
45}