favicon_scraper/
icon.rs

1use imagesize::ImageError;
2pub use imagesize::ImageSize;
3use reqwest::{Client, IntoUrl, Response};
4use url::Url;
5
6use crate::Error;
7
8/// The source of a scraped icon.
9///
10/// More sources may be added in the future.
11#[non_exhaustive]
12#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
13pub enum IconKind {
14    /// Discovered through checking a hardcoded URL path, like `/favicon.ico`
15    HardcodedURL,
16    /// Discovered through parsing the HTML for `<link rel="icon">`s (or similar)
17    LinkedInHTML,
18    /// Discovered through parsing the Web App Manifest linked in the HTML in `<link rel="manifest">`
19    LinkedInManifest,
20}
21
22/// A scraped icon.
23///
24/// To obtain, use [`crate::scrape`], [`crate::html::HTML::scan_html`], or [`crate::manifest::scan_manifest`].
25#[derive(Debug, Clone, Hash, PartialEq, Eq)]
26pub struct Icon {
27    /// Describes how the icon was discovered
28    pub kind: IconKind,
29    /// The source URL of the scraped icon, with redirects resolved
30    pub url: Url,
31    /// The size of the scraped icon, in pixels
32    pub size: ImageSize,
33}
34
35impl Icon {
36    /// This is a separate function because you can't break with a value
37    /// from `while let` loops (which is understandable)
38    async fn find_size(mut response: Response) -> Result<ImageSize, Error> {
39        let mut buffer = vec![];
40        while let Some(chunk) = response.chunk().await? {
41            buffer.extend_from_slice(&chunk);
42            match imagesize::blob_size(&buffer) {
43                Ok(size) => return Ok(size),
44                Err(ImageError::IoError(_)) => continue,
45                Err(_) => return Err(Error::UnsupportedImageFormat),
46            }
47        }
48        Err(Error::UnsupportedImageFormat)
49    }
50
51    /// Create an `Icon` from a URL by fetching it partially using the given client to get its size.
52    /// This used to be public but I don't want random users to deal with `kind`
53    pub(crate) async fn from_url(
54        client: &Client,
55        url: impl IntoUrl,
56        kind: IconKind,
57    ) -> Result<Self, Error> {
58        let response = client.get(url).send().await?;
59        let url = response.url().to_owned();
60        let size = Icon::find_size(response).await?;
61        Ok(Icon { kind, url, size })
62    }
63}
64
65#[cfg(test)]
66mod tests {
67    use super::*;
68
69    #[tokio::test]
70    async fn test_google() {
71        let client = reqwest::Client::new();
72
73        let icon = Icon::from_url(
74            &client,
75            "https://google.com/favicon.ico",
76            IconKind::HardcodedURL,
77        )
78        .await
79        .unwrap();
80
81        let ImageSize { width, height } = icon.size;
82        println!("The size of Google's favicon is {width}x{height} pixels.");
83    }
84}