1use crate::{
2 icon::{Icon, IconKind},
3 Error,
4};
5use futures::future::join_all;
6use reqwest::{Client, IntoUrl};
7use scraper::{Html as SHTML, Selector};
8use url::Url;
9
10const ICON_SELECTOR: &str =
11 "link[rel~='icon'], link[rel~='apple-touch-icon'], link[rel~='apple-touch-icon-precomposed']";
12const MANIFEST_SELECTOR: &str = "link[rel~='manifest']";
13
14#[derive(Debug, Clone, Hash, PartialEq, Eq)]
18pub struct HTML {
19 pub icons: Vec<Icon>,
20 pub manifest: Option<Url>,
21}
22
23impl HTML {
24 fn get_urls_from_html<'s, 'h, 'u>(
25 selector: &'s Selector,
26 html: &'h SHTML,
27 url: &'u Url,
28 ) -> impl Iterator<Item = Url> + use<'s, 'h, 'u> {
29 html.select(selector)
30 .filter_map(|e| e.attr("href"))
31 .filter_map(|u| url.join(u).ok())
32 }
33
34 fn parse_html(text: String, url: Url) -> (Option<Url>, Vec<Url>) {
35 let html = SHTML::parse_document(&text);
36
37 let icon_selector = Selector::parse(ICON_SELECTOR).unwrap();
38 let manifest_selector = Selector::parse(MANIFEST_SELECTOR).unwrap();
39
40 let manifest = HTML::get_urls_from_html(&manifest_selector, &html, &url).next();
41 (
42 manifest,
43 HTML::get_urls_from_html(&icon_selector, &html, &url).collect(),
44 )
45 }
46
47 pub async fn scan_html(client: &Client, url: impl IntoUrl) -> Result<Self, Error> {
49 let response = client.get(url).send().await?;
50 let url = response.url().to_owned(); let text = response.text().await?;
52
53 let (manifest, icons) = HTML::parse_html(text, url);
54
55 let icons = icons
56 .into_iter()
57 .map(|u| Icon::from_url(client, u, IconKind::LinkedInHTML));
58 let icons: Vec<Icon> = join_all(icons)
59 .await
60 .into_iter()
61 .filter_map(|i| i.ok())
62 .collect();
63
64 Ok(HTML { icons, manifest })
65 }
66}
67
68#[cfg(test)]
69mod tests {
70 use super::*;
71
72 #[test]
73 fn selectors_must_parse() {
74 Selector::parse(ICON_SELECTOR).expect("Icon selector didn't parse");
75 Selector::parse(MANIFEST_SELECTOR).expect("Manifest selector didn't parse");
76 }
77}