news-flash 3.0.0

Base library for a modern feed reader
Documentation
use super::MAX_SIZE;
use super::error::FavIconError;
use super::icon_info::IconInfo;
use super::scraped_icon::ScrapedIcon;
use kuchikikiki::{NodeRef, traits::*};
use reqwest::Client;
use reqwest::header::{CONTENT_TYPE, ETAG};
use std::str;
use url::Url;

#[derive(Debug)]
pub struct IconScraper {
    icon_infos: Vec<IconInfo>,
}

impl IconScraper {
    pub async fn from_http(url: &Url, client: &Client) -> Result<Self, FavIconError> {
        let result = client.get(url.as_str()).send().await.inspect_err(|error| {
            tracing::warn!(%url, %error, "Failed to download");
        })?;
        let html = result.text().await.inspect_err(|error| {
            tracing::warn!(%url, %error, "Failed to download url");
        })?;

        let document = kuchikikiki::parse_html().one(html);

        let mut icon_infos = Vec::new();
        icon_infos.append(&mut Self::select(&document, url, "link[rel=\"apple-touch-icon\"]"));
        icon_infos.append(&mut Self::select(&document, url, "link[rel=\"icon\"]"));
        icon_infos.append(&mut Self::select(&document, url, "link[rel=\"shortcut icon\"]"));

        // don't forget about goold old /favicon.ico
        let mut base_url = url.clone();
        base_url.set_path("");
        base_url.set_query(None);
        if let Ok(basic_favicon_url) = base_url.join("favicon.ico") {
            icon_infos.push(IconInfo::new(basic_favicon_url, None));
        }

        Ok(IconScraper { icon_infos })
    }

    pub async fn fetch_best(&mut self, client: &Client, prefered_size: u32) -> Option<ScrapedIcon> {
        self.icon_infos.sort_unstable();

        // see if there is an icon with the prefered width x height in px
        let icon_prefered_size = self.icon_infos.iter().find(|info| {
            info.size
                .as_ref()
                .map(|size| {
                    (size.width == prefered_size && size.height <= prefered_size) || (size.height == prefered_size && size.width <= prefered_size)
                })
                .unwrap_or(false)
        });

        if let Some(icon) = Self::download_icon(icon_prefered_size.cloned(), client).await {
            return Some(icon);
        }

        let ideal_pixel_count = prefered_size * prefered_size;
        let max_pixel_count = MAX_SIZE * MAX_SIZE;

        // find the next larger icon but limit max size
        let bigger_icons = self
            .icon_infos
            .iter()
            .filter(|info| {
                let total_pixels = info.size.map(|size| size.total_pixels()).unwrap_or(0);
                total_pixels > ideal_pixel_count && total_pixels <= max_pixel_count
            })
            .collect::<Vec<_>>();

        if let Some(icon) = Self::download_icon(bigger_icons.first().map(|i| (*i).clone()), client).await {
            return Some(icon);
        }

        // find the next smaller icon
        let smaller_icons = self
            .icon_infos
            .iter()
            .filter(|info| {
                let total_pixels = info.size.map(|size| size.total_pixels()).unwrap_or(0);
                total_pixels < ideal_pixel_count && total_pixels > 0
            })
            .collect::<Vec<_>>();

        if let Some(icon) = Self::download_icon(smaller_icons.last().map(|i| (*i).clone()), client).await {
            return Some(icon);
        }

        // just take the first icon
        if let Some(icon) = Self::download_icon(self.icon_infos.first().cloned(), client).await {
            return Some(icon);
        }

        None
    }

    async fn download_icon(info: Option<IconInfo>, client: &Client) -> Option<ScrapedIcon> {
        let info = match info {
            Some(info) => info,
            None => return None,
        };

        if let Ok(response) = client.get(info.url.as_str()).send().await {
            let etag = response.headers().get(ETAG).and_then(|etag| etag.to_str().ok().map(ToOwned::to_owned));
            let mime = response
                .headers()
                .get(CONTENT_TYPE)
                .and_then(|content_type| content_type.to_str().ok().map(ToOwned::to_owned));
            let is_image_mime = mime.as_deref().map(|mime| mime.starts_with("image")).unwrap_or(false);

            if let (true, Ok(response_buffer)) = (is_image_mime, response.bytes().await) {
                Some(ScrapedIcon {
                    info,
                    mime,
                    etag,
                    data: response_buffer.to_vec(),
                })
            } else {
                None
            }
        } else {
            None
        }
    }

    fn select(document: &NodeRef, base: &Url, selector: &str) -> Vec<IconInfo> {
        let mut res = Vec::new();

        let Ok(nodes) = document.select(selector) else {
            return res;
        };

        for node in nodes {
            let attrs = node.attributes.borrow();
            let url = attrs.get("href").and_then(|href| base.clone().join(href).ok());
            let sizes_prop = attrs.get("sizes");

            if let Some(url) = url {
                res.push(IconInfo::new(url, sizes_prop));
            }
        }

        res
    }
}