mod error;
mod icon_info;
mod scraped_icon;
mod scraper;
pub use self::error::FavIconError;
use self::scraper::IconScraper;
use crate::database::DatabaseExt;
use crate::feed_api::FeedApi;
use crate::models::{FatFavIcon, FavIcon, Feed, FeedID, Url};
use chrono::{DateTime, Duration, Utc};
use image::codecs::png::PngEncoder;
use image::{GenericImageView, ImageEncoder, ImageReader, imageops};
use reqwest::header::{HeaderMap, HeaderValue};
use reqwest::{self, Client};
use std::io::Cursor;
use std::sync::Arc;
use tokio::sync::{RwLock, Semaphore};
pub const LOW_RES: u32 = 32;
pub const HIGH_RES: u32 = 144;
pub const MAX_SIZE: u32 = 512;
pub const EXPIRES_AFTER_DAYS: i64 = 30;
pub struct FavIconLoader {
db: Arc<Box<dyn DatabaseExt>>,
download_semaphore: Arc<Semaphore>,
}
impl FavIconLoader {
pub fn new(db: &Arc<Box<dyn DatabaseExt>>, download_semaphore: &Arc<Semaphore>) -> Self {
FavIconLoader {
db: db.clone(),
download_semaphore: download_semaphore.clone(),
}
}
pub async fn get_icon(
&self,
feed_id: &FeedID,
api: &RwLock<Box<dyn FeedApi>>,
client: &Client,
header: HeaderMap<HeaderValue>,
) -> Result<FavIcon, FavIconError> {
let favicon = self.db.read_favicon(feed_id).ok();
let valid_icon = favicon.clone().filter(|favicon| !favicon.is_expired());
if let Some(favicon) = valid_icon {
Ok(favicon)
} else {
Self::get_and_write_new_icon(&self.db, api, &self.download_semaphore, feed_id, favicon.as_ref(), client, header).await
}
}
async fn get_and_write_new_icon(
db: &Arc<Box<dyn DatabaseExt>>,
api: &RwLock<Box<dyn FeedApi>>,
download_semaphore: &Arc<Semaphore>,
feed_id: &FeedID,
old_icon: Option<&FavIcon>,
client: &Client,
header: HeaderMap<HeaderValue>,
) -> Result<FavIcon, FavIconError> {
tracing::debug!(?feed_id, "Downloading new icon");
let feeds = db.read_feeds()?;
let feed = feeds.iter().find(|f| &f.feed_id == feed_id).ok_or(FavIconError::NoFeed)?;
let permit = download_semaphore.acquire().await?;
let favicon = Self::fetch_new_icon(Some(api), feed, client, header, old_icon).await;
drop(permit);
let favicon = match Self::resize_icon(favicon.clone()) {
Ok(resized_icon) => {
db.insert_favicon(&resized_icon).map_err(|error| {
tracing::error!(%feed_id, %error, "Failed to write favicon to db");
FavIconError::DB(error)
})?;
resized_icon
}
Err(error) => {
tracing::debug!(%error, "resize of icon failed");
favicon
}
};
Ok(FavIcon::from(favicon))
}
fn resize_icon(mut original: FatFavIcon) -> Result<FatFavIcon, FavIconError> {
if original.highres.is_some() && original.lowres.is_some() {
return Ok(original);
}
if original.format.as_deref().map(|format| format.starts_with("image/svg")).unwrap_or(false) {
return Ok(original);
}
let data = if original.highres.is_none() {
original.lowres.clone()
} else {
original.highres.clone()
}
.ok_or(FavIconError::Resize)?;
let cursor = Cursor::new(data);
let image = ImageReader::new(cursor)
.with_guessed_format()
.map_err(|error| {
tracing::debug!(%error, "reader with_guessed_format");
FavIconError::Resize
})?
.decode()
.map_err(|error| {
tracing::debug!(%error, "decode");
FavIconError::Resize
})?;
if let Some(format) = original.format.as_deref()
&& format != "image/png"
&& format != "image/jpeg"
{
let mut dest = Cursor::new(Vec::new());
let encoder = PngEncoder::new(&mut dest);
image.write_with_encoder(encoder).map_err(|_| FavIconError::Resize)?;
original.format = Some("image/png".to_string());
original.lowres = Some(dest.into_inner());
}
let (original_width, original_height) = image.dimensions();
if original_width <= LOW_RES && original_height <= LOW_RES {
return Ok(original);
}
let (dest_width, dest_height) = Self::calc_favicon_dimensions(original_width, original_height);
let resized = imageops::resize(&image, dest_width, dest_height, imageops::FilterType::Triangle);
let (width, height) = resized.dimensions();
let resized_raw = resized.into_vec();
let mut dest = Cursor::new(Vec::new());
let encoder = PngEncoder::new(&mut dest);
encoder
.write_image(&resized_raw, width, height, image::ExtendedColorType::Rgba8)
.map_err(|error| {
tracing::debug!(%error, "write image");
FavIconError::Resize
})?;
Ok(FatFavIcon {
feed_id: original.feed_id,
expires: original.expires,
format: Some("image/png".into()),
etag: original.etag,
lowres_source_url: None,
lowres: Some(dest.into_inner()),
highres: if original.highres.is_none() {
original.lowres.clone()
} else {
original.highres.clone()
},
highres_source_url: original.lowres_source_url,
})
}
fn calc_favicon_dimensions(original_width: u32, original_height: u32) -> (u32, u32) {
if original_width <= LOW_RES && original_height <= LOW_RES {
return (original_width, original_height);
}
let ratio = (original_width as f64) / (original_height as f64);
if original_width >= original_height {
(LOW_RES, (LOW_RES as f64 / ratio) as u32)
} else {
((LOW_RES as f64 * ratio) as u32, LOW_RES)
}
}
pub async fn fetch_new_icon(
api: Option<&RwLock<Box<dyn FeedApi>>>,
feed: &Feed,
client: &Client,
header: HeaderMap<HeaderValue>,
old_icon: Option<&FavIcon>,
) -> FatFavIcon {
tracing::debug!(?feed, "fetch_new_icon");
if let Some(api) = api
&& let Ok(favicon) = api.read().await.get_favicon(&feed.feed_id, client, header).await
{
if let (Some(source_url), None) = (&favicon.source_url, &favicon.data) {
if let Ok(favicon) = Self::download(source_url, &feed.feed_id, client, old_icon).await {
tracing::debug!(?source_url, "Favicon downloaded backend source url");
if favicon.data.as_deref().map(Self::check_aspect_ratio).unwrap_or(false) {
return FatFavIcon::from(favicon);
}
}
} else if favicon.data.is_some() {
tracing::debug!("Favicon downloaded from backend.");
return FatFavIcon::from(favicon);
} else {
tracing::warn!("Favicon from backend doesn't contain data or source url");
}
}
if let Some(icon_url) = &feed.icon_url
&& let Ok(favicon) = Self::download(icon_url, &feed.feed_id, client, old_icon).await
{
tracing::debug!(has_data = favicon.data.is_some(), "Favicon downloaded from feed data");
if favicon.data.as_deref().map(Self::check_aspect_ratio).unwrap_or(false) {
return FatFavIcon::from(favicon);
}
}
if let Some(fatfavicon) = Self::scrap(feed, client).await {
tracing::debug!("Favicon scraped from website.");
return fatfavicon;
}
FatFavIcon {
feed_id: feed.feed_id.clone(),
expires: Self::gen_expires(),
format: None,
etag: None,
lowres_source_url: None,
lowres: None,
highres: None,
highres_source_url: None,
}
}
fn check_aspect_ratio(data: &[u8]) -> bool {
if let Some(image) = ImageReader::new(Cursor::new(data))
.with_guessed_format()
.map_err(|error| tracing::debug!(%error, "read with_guessed_format"))
.ok()
.and_then(|image| image.decode().map_err(|error| tracing::debug!(%error, "decode")).ok())
{
let aspect_ratio = image.width() as f32 / image.height() as f32;
if aspect_ratio > 1.5 {
tracing::debug!("Image is very wide with an apect ratio of {aspect_ratio}. It is probably not a favicon");
false
} else {
true
}
} else {
false
}
}
async fn scrap(feed: &Feed, client: &Client) -> Option<FatFavIcon> {
let Some(website) = &feed.website else {
return None;
};
let Ok(mut scraper) = IconScraper::from_http(website, client).await else {
tracing::warn!(%website, "Failed to scrap icon");
return None;
};
let Some(scraped_icon) = scraper.fetch_best(client, LOW_RES).await else {
tracing::warn!(%website, "Failed to download best scrapped icon");
return None;
};
tracing::debug!(%scraped_icon.info.url, "Scraped favicon");
let Some(highres_scraped_icon) = scraper.fetch_best(client, HIGH_RES).await else {
return Some(FatFavIcon {
feed_id: feed.feed_id.clone(),
expires: Self::gen_expires(),
format: scraped_icon.mime,
etag: scraped_icon.etag,
lowres_source_url: Some(Url::new(scraped_icon.info.url)),
lowres: Some(scraped_icon.data),
highres: None,
highres_source_url: None,
});
};
let (highres, highres_source_url) = if scraped_icon.data != highres_scraped_icon.data {
(Some(highres_scraped_icon.data), Some(Url::new(highres_scraped_icon.info.url)))
} else {
(None, None)
};
Some(FatFavIcon {
feed_id: feed.feed_id.clone(),
expires: Self::gen_expires(),
format: scraped_icon.mime,
etag: scraped_icon.etag,
lowres_source_url: Some(Url::new(scraped_icon.info.url)),
lowres: Some(scraped_icon.data),
highres,
highres_source_url,
})
}
async fn download(url: &Url, feed_id: &FeedID, client: &Client, old_icon: Option<&FavIcon>) -> Result<FavIcon, FavIconError> {
let res = client.get(url.as_str()).send().await?;
let etag = res
.headers()
.get(reqwest::header::ETAG)
.and_then(|etag| etag.to_str().ok())
.map(ToString::to_string);
let content_type = res
.headers()
.get(reqwest::header::CONTENT_TYPE)
.and_then(|etag| etag.to_str().ok())
.map(ToString::to_string);
if let Some(old_icon) = old_icon
&& let Some(old_etag) = old_icon.etag.as_deref()
&& let Some(http_etag) = etag.as_deref()
&& old_etag == http_etag
{
return Ok(FavIcon {
feed_id: old_icon.feed_id.clone(),
expires: Self::gen_expires(),
format: old_icon.format.clone(),
etag,
source_url: old_icon.source_url.clone(),
data: old_icon.data.clone(),
});
}
let data = res.bytes().await?.to_vec();
Ok(FavIcon {
feed_id: feed_id.clone(),
expires: Self::gen_expires(),
format: content_type,
etag,
source_url: Some(url.clone()),
data: Some(data),
})
}
fn gen_expires() -> DateTime<Utc> {
Utc::now() + Duration::try_days(EXPIRES_AFTER_DAYS).unwrap()
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::sync::Arc;
use crate::database::MockDatabaseExt;
use crate::error::{DatabaseError, FeedApiError};
use crate::feed_api::MockFeedApi;
use crate::models::{Feed, FeedID, Url};
use crate::util::feed_parser::{self, ParsedUrl};
use reqwest::{Client, ClientBuilder};
use test_log::test;
use tokio::sync::{RwLock, Semaphore};
fn mock_loader(feed: Option<Feed>, favicon: Option<FavIcon>) -> (FavIconLoader, RwLock<Box<dyn FeedApi>>) {
let favicon_clone = favicon.clone();
let mut feed_api = MockFeedApi::new();
feed_api
.expect_get_favicon()
.returning(move |_, _, _| favicon.clone().ok_or(FeedApiError::Unknown));
let feed_api: RwLock<Box<dyn FeedApi>> = RwLock::new(Box::new(feed_api));
let mut db = MockDatabaseExt::new();
db.expect_read_favicon().returning(|_| Err(DatabaseError::Unknown));
db.expect_insert_favicon().returning(|_| Ok(()));
db.expect_read_feeds()
.returning(move || feed.clone().map(|f| vec![f]).ok_or(DatabaseError::Unknown));
db.expect_read_favicon()
.returning(move |_| favicon_clone.clone().ok_or(DatabaseError::Unknown));
let db: Arc<Box<dyn DatabaseExt>> = Arc::new(Box::new(db));
let semaphore = Arc::new(Semaphore::new(5));
let loader = FavIconLoader::new(&db, &semaphore);
(loader, feed_api)
}
async fn prepare_feed(url_str: &str) -> (Client, Feed) {
let client = Client::new();
let semaphore = Arc::new(Semaphore::new(20));
let url = Url::parse(url_str).unwrap();
let feed_id = FeedID::new(url_str);
let feed = feed_parser::download_and_parse_feed(&url, &feed_id, None, semaphore, &client)
.await
.unwrap();
let feed = match feed {
ParsedUrl::SingleFeed(feed) => *feed,
ParsedUrl::MultipleFeeds(_) => panic!("Expected Single Feed"),
};
(client, feed)
}
#[test(tokio::test)]
async fn investigatewest() {
let feed_id = FeedID::new("https://www.investigatewest.org/latest/rss/");
let feed = Feed {
feed_id: feed_id.clone(),
label: "InvestigateWest".into(),
website: Url::parse("https://www.investigatewest.org/").ok(),
feed_url: Url::parse("https://www.investigatewest.org/latest/rss/").ok(),
icon_url: Url::parse("https://www.investigatewest.org/favicon.png").ok(),
error_count: 0,
error_message: None,
};
let (loader, api) = mock_loader(Some(feed), None);
let favicon = loader.get_icon(&feed_id, &api, &Client::new(), HeaderMap::new()).await.unwrap();
assert_eq!(
favicon.source_url,
Some(Url::parse("https://www.investigatewest.org/content/images/2025/07/favicon.ico").unwrap())
)
}
#[test(tokio::test)]
async fn golem() {
let (client, golem_feed) = prepare_feed("https://rss.golem.de/rss.php?feed=ATOM1.0").await;
let favicon = FavIconLoader::scrap(&golem_feed, &client).await.unwrap();
assert_eq!(favicon.feed_id, golem_feed.feed_id);
assert!(favicon.format.expect("No favicon format").starts_with("image/"));
}
#[test(tokio::test)]
async fn planet_gnome() {
let (client, mut gnome_feed) = prepare_feed("http://planet.gnome.org/rss20.xml").await;
gnome_feed.website = Url::parse("https://planet.gnome.org/").ok();
let favicon = FavIconLoader::scrap(&gnome_feed, &client).await.unwrap();
assert_eq!(favicon.feed_id, gnome_feed.feed_id);
assert!(favicon.format.expect("No favicon format").starts_with("image/"));
}
#[test(tokio::test)]
async fn reddit_scraper() {
let reddit_feed = Feed {
feed_id: FeedID::new("http://reddit.com"),
label: String::from("reddit"),
website: Some(Url::parse("http://reddit.com").unwrap()),
feed_url: None,
icon_url: None,
error_count: 0,
error_message: None,
};
let client = ClientBuilder::new()
.user_agent("Mozilla/5.0 (X11; Linux x86_64; rv:109.0)")
.build()
.unwrap();
let favicon = FavIconLoader::scrap(&reddit_feed, &client).await.unwrap();
assert_eq!(favicon.feed_id, reddit_feed.feed_id);
assert!(favicon.format.expect("No favicon format").starts_with("image/"));
}
#[test(tokio::test)]
async fn golem_scraper() {
let reddit_feed = Feed {
feed_id: FeedID::new("http://golem.de"),
label: String::from("Golem"),
website: Some(Url::parse("http://golem.de").unwrap()),
feed_url: None,
icon_url: None,
error_count: 0,
error_message: None,
};
let client = Client::new();
let favicon = FavIconLoader::scrap(&reddit_feed, &client).await.unwrap();
assert_eq!(favicon.feed_id, reddit_feed.feed_id);
assert!(favicon.format.expect("No favicon format").starts_with("image/"));
}
#[test(tokio::test)]
async fn serienjunkies_scraper() {
let feed = Feed {
feed_id: FeedID::new("https://www.serienjunkies.de/news/"),
label: String::from("Serienjunkies"),
website: Some(Url::parse("https://www.serienjunkies.de/news/").unwrap()),
feed_url: None,
icon_url: None,
error_count: 0,
error_message: None,
};
let client = ClientBuilder::new().user_agent("Wget/1.20.3 (linux-gnu)").build().unwrap();
let favicon = FavIconLoader::scrap(&feed, &client).await.unwrap();
assert_eq!(favicon.feed_id, feed.feed_id);
assert!(favicon.lowres.is_some());
}
#[test(tokio::test)]
async fn spiegel_scraper() {
let feed = Feed {
feed_id: FeedID::new("http://www.spiegel.de/"),
label: String::from("Serienjunkies"),
website: Some(Url::parse("http://www.spiegel.de/").unwrap()),
feed_url: None,
icon_url: None,
error_count: 0,
error_message: None,
};
let client = ClientBuilder::new().user_agent("Wget/1.20.3 (linux-gnu)").build().unwrap();
let favicon = FavIconLoader::scrap(&feed, &client).await.unwrap();
assert_eq!(favicon.feed_id, feed.feed_id);
assert!(favicon.lowres.is_some());
}
}