use super::MAX_SIZE;
use super::error::FavIconError;
use super::icon_info::IconInfo;
use super::scraped_icon::ScrapedIcon;
use kuchikikiki::{NodeRef, traits::*};
use reqwest::Client;
use reqwest::header::{CONTENT_TYPE, ETAG};
use std::str;
use url::Url;
#[derive(Debug)]
pub struct IconScraper {
icon_infos: Vec<IconInfo>,
}
impl IconScraper {
pub async fn from_http(url: &Url, client: &Client) -> Result<Self, FavIconError> {
let result = client.get(url.as_str()).send().await.inspect_err(|error| {
tracing::warn!(%url, %error, "Failed to download");
})?;
let html = result.text().await.inspect_err(|error| {
tracing::warn!(%url, %error, "Failed to download url");
})?;
let document = kuchikikiki::parse_html().one(html);
let mut icon_infos = Vec::new();
icon_infos.append(&mut Self::select(&document, url, "link[rel=\"apple-touch-icon\"]"));
icon_infos.append(&mut Self::select(&document, url, "link[rel=\"icon\"]"));
icon_infos.append(&mut Self::select(&document, url, "link[rel=\"shortcut icon\"]"));
let mut base_url = url.clone();
base_url.set_path("");
base_url.set_query(None);
if let Ok(basic_favicon_url) = base_url.join("favicon.ico") {
icon_infos.push(IconInfo::new(basic_favicon_url, None));
}
Ok(IconScraper { icon_infos })
}
pub async fn fetch_best(&mut self, client: &Client, prefered_size: u32) -> Option<ScrapedIcon> {
self.icon_infos.sort_unstable();
let icon_prefered_size = self.icon_infos.iter().find(|info| {
info.size
.as_ref()
.map(|size| {
(size.width == prefered_size && size.height <= prefered_size) || (size.height == prefered_size && size.width <= prefered_size)
})
.unwrap_or(false)
});
if let Some(icon) = Self::download_icon(icon_prefered_size.cloned(), client).await {
return Some(icon);
}
let ideal_pixel_count = prefered_size * prefered_size;
let max_pixel_count = MAX_SIZE * MAX_SIZE;
let bigger_icons = self
.icon_infos
.iter()
.filter(|info| {
let total_pixels = info.size.map(|size| size.total_pixels()).unwrap_or(0);
total_pixels > ideal_pixel_count && total_pixels <= max_pixel_count
})
.collect::<Vec<_>>();
if let Some(icon) = Self::download_icon(bigger_icons.first().map(|i| (*i).clone()), client).await {
return Some(icon);
}
let smaller_icons = self
.icon_infos
.iter()
.filter(|info| {
let total_pixels = info.size.map(|size| size.total_pixels()).unwrap_or(0);
total_pixels < ideal_pixel_count && total_pixels > 0
})
.collect::<Vec<_>>();
if let Some(icon) = Self::download_icon(smaller_icons.last().map(|i| (*i).clone()), client).await {
return Some(icon);
}
if let Some(icon) = Self::download_icon(self.icon_infos.first().cloned(), client).await {
return Some(icon);
}
None
}
async fn download_icon(info: Option<IconInfo>, client: &Client) -> Option<ScrapedIcon> {
let info = match info {
Some(info) => info,
None => return None,
};
if let Ok(response) = client.get(info.url.as_str()).send().await {
let etag = response.headers().get(ETAG).and_then(|etag| etag.to_str().ok().map(ToOwned::to_owned));
let mime = response
.headers()
.get(CONTENT_TYPE)
.and_then(|content_type| content_type.to_str().ok().map(ToOwned::to_owned));
let is_image_mime = mime.as_deref().map(|mime| mime.starts_with("image")).unwrap_or(false);
if let (true, Ok(response_buffer)) = (is_image_mime, response.bytes().await) {
Some(ScrapedIcon {
info,
mime,
etag,
data: response_buffer.to_vec(),
})
} else {
None
}
} else {
None
}
}
fn select(document: &NodeRef, base: &Url, selector: &str) -> Vec<IconInfo> {
let mut res = Vec::new();
let Ok(nodes) = document.select(selector) else {
return res;
};
for node in nodes {
let attrs = node.attributes.borrow();
let url = attrs.get("href").and_then(|href| base.clone().join(href).ok());
let sizes_prop = attrs.get("sizes");
if let Some(url) = url {
res.push(IconInfo::new(url, sizes_prop));
}
}
res
}
}