sc_scraping/
parse_web_bio.rs1use log::error;
2use scraper::{ElementRef, Selector};
3
4use crate::error::ScScrapingError;
5
6pub(crate) struct WebBio {
7 pub website: Option<String>,
8 pub bio: Vec<String>,
9}
10
11pub(crate) fn parse_web_bio<'a>(element: &ElementRef) -> Result<WebBio, ScScrapingError<'a>> {
12 let selector = Selector::parse("div.profile-content > div.right-col").unwrap();
13
14 let fragment = element.select(&selector).next().ok_or_else(|| {
15 let message = "Web and bio parent could not be found";
16 error!("{message}");
17 ScScrapingError::citizen(message)
18 })?;
19
20 let web_bio = WebBio {
21 website: parse_website(&fragment),
22 bio: parse_bio(&fragment),
23 };
24
25 Ok(web_bio)
26}
27
28fn parse_website(element: &ElementRef) -> Option<String> {
29 let selector = Selector::parse("p.website > a").unwrap();
30
31 element.select(&selector).map(|w| w.inner_html()).next()
32}
33
34fn parse_bio(element: &ElementRef) -> Vec<String> {
35 let selector = Selector::parse("div.entry.bio > div.value").unwrap();
36
37 element
38 .select(&selector)
39 .flat_map(|e| e.children())
40 .filter_map(|e| e.value().as_text())
41 .map(|t| t.trim())
42 .filter(|t| !t.is_empty())
43 .map(|t| t.to_string())
44 .collect::<Vec<_>>()
45}