mod cache;
mod download_result;
pub mod metadata;
use self::metadata::LocalMetadata;
use crate::error::FeedParserError;
use crate::feed_api::FeedHeaderMap;
use crate::models::{
self, ArticleID, Category, CategoryID, FatArticle, FavIcon, Feed, FeedID, FeedUpdateResult, LoginData, Marked, PluginCapabilities, Read,
SyncResult, TagID, Url,
};
use crate::util;
use crate::util::favicons::EXPIRES_AFTER_DAYS;
use crate::util::relative_url_evaluater::RelativeUrlEvaluater;
use crate::util::text2html::Text2Html;
use crate::{
feed_api::{FeedApi, FeedApiError, FeedApiResult, Portal},
models::Enclosure,
};
use ammonia::UrlRelative;
use async_trait::async_trait;
use cache::LocalRSSCache;
use chrono::{DateTime, TimeDelta, Utc};
use download_result::{DownloadFailure, DownloadSuccess};
use feed_rs::model::{Entry, Link, MediaContent, MediaObject, Text};
use feed_rs::parser::{Builder as ParserBuilder, ParseFeedError};
use once_cell::sync::Lazy;
use regex::{Regex, RegexBuilder};
use reqwest::Client;
use reqwest::header::{HeaderMap, HeaderValue};
use std::collections::{HashMap, HashSet};
use std::str;
use std::sync::Arc;
use std::time::Duration;
use tokio::sync::RwLock;
const SAME_HOST_DELAY_MS: u64 = 1500;
const GENERAL_DELAY_MS: u64 = 100;
static DELAY_HOST_BLACKLIST: Lazy<HashSet<&'static str>> = Lazy::new(|| {
let mut set = HashSet::new();
set.insert("youtube.com");
set.insert("medium.com");
set.insert("podcasts.apple.com");
set
});
static TAG_BLACKLIST: Lazy<HashSet<&'static str>> = Lazy::new(|| {
let mut tag_blacklist = HashSet::new();
tag_blacklist.insert("script");
tag_blacklist.insert("style");
tag_blacklist
});
static HTML_ENTITY_REGEX: Lazy<Regex> = Lazy::new(|| {
RegexBuilder::new(r#"&(?:[a-z\d]+|#\d+|#x[a-f\d]+)"#)
.case_insensitive(true)
.build()
.expect("HTML_ENTITY_REGEX regex")
});
impl From<ParseFeedError> for FeedApiError {
fn from(error: ParseFeedError) -> FeedApiError {
match error {
ParseFeedError::ParseError(e) => FeedApiError::Api {
message: format!("Error parsing feed: {e}"),
},
ParseFeedError::IoError(e) => FeedApiError::IO(e),
ParseFeedError::JsonSerde(e) => FeedApiError::Json {
source: e,
json: "Unavailable".into(),
},
ParseFeedError::JsonUnsupportedVersion(e) => FeedApiError::Api {
message: format!("Unsupported Json feed: {e}"),
},
ParseFeedError::XmlReader(e) => FeedApiError::Api {
message: format!("Error parsing xml: {e}"),
},
}
}
}
pub struct LocalRSS {
portal: Arc<Box<dyn Portal>>,
cache: Arc<RwLock<LocalRSSCache>>,
}
impl LocalRSS {
fn select_article_url(links: &[Link]) -> Option<Url> {
let mut url = links
.iter()
.find(|l| l.rel.as_deref() == Some("alternate"))
.and_then(|l| Url::parse(&l.href).ok());
if url.is_none() {
for link in links {
if let Ok(parsed_url) = Url::parse(&link.href) {
url = Some(parsed_url);
break;
}
}
}
url
}
fn sanitize_text(text: &mut Text, base_url: Option<&Url>) {
if text.content_type.as_str() != "text/plain" || ammonia::is_html(&text.content) || HTML_ENTITY_REGEX.is_match(&text.content) {
text.content = Self::sanitize_string(&text.content, base_url);
}
}
fn sanitize_string(text: &str, base_url: Option<&Url>) -> String {
let mut sanitizer = ammonia::Builder::default();
sanitizer.clean_content_tags(TAG_BLACKLIST.clone());
sanitizer.add_tags(&["video", "audio", "source", "iframe"]);
sanitizer.add_tag_attributes("source", &["src", "type"]);
sanitizer.add_tag_attributes("iframe", &["src"]);
sanitizer.add_generic_attributes(&["id"]);
if let Some(base_url) = base_url.cloned() {
let eval = RelativeUrlEvaluater::new(base_url);
sanitizer.url_relative(UrlRelative::Custom(Box::new(eval)));
}
sanitizer.clean(text).to_string()
}
fn convert_entry(entry: Entry, feed_id: &FeedID, website: Option<&Url>, portal: Arc<Box<dyn Portal>>) -> Option<(FatArticle, Vec<Enclosure>)> {
let Entry {
id,
updated,
mut title,
authors,
content,
links,
summary: mut entry_summary,
categories: _,
contributors: _,
published,
source: _,
rights: _,
media,
language: _,
base: _,
} = entry;
let mut article_id = ArticleID::new(&id);
let mut local_article = portal.get_article(&article_id).ok();
if local_article.as_ref().map(|a| &a.feed_id != feed_id).unwrap_or(false) {
tracing::debug!(%article_id, "article already published by another feed");
article_id = ArticleID::from_owned(format!("{id}:{feed_id}"));
local_article = portal.get_article(&article_id).ok();
}
let article_url = Self::select_article_url(&links);
let mut marked = Marked::Unmarked;
let mut unread = Read::Unread;
if let Some(local_article) = local_article {
if let Some(updated) = entry.updated {
if local_article.date >= updated {
return None;
}
} else {
return None;
}
marked = local_article.marked;
unread = local_article.unread;
}
if let Some(text) = title.as_mut() {
Self::sanitize_text(text, website);
}
if let Some(text) = entry_summary.as_mut() {
Self::sanitize_text(text, website);
}
let html = match content {
Some(content) => {
let xml_base = content.src.as_ref().map(|l| l.href.clone()).and_then(|xb| Url::parse(&xb).ok());
let xml_base = xml_base.map(Some).unwrap_or(website.cloned());
content.body.map(|body| Self::sanitize_string(&body, xml_base.as_ref()))
}
None => match &entry_summary {
Some(original_summary) => Some(original_summary.content.clone()),
None => media.first().and_then(|m| m.description.clone()).map(|mut text| {
Self::sanitize_text(&mut text, website);
text.content
}),
},
};
let html = html.map(|s| if !ammonia::is_html(&s) { Text2Html::process(&s) } else { s });
let plain_text = match &entry_summary {
Some(summary) => Some(util::html2text::html2text(&summary.content)),
None => html.as_deref().map(util::html2text::html2text),
};
let summary = plain_text.as_deref().map(util::html2text::text2summary);
let mut thumbnail_url = media
.iter()
.filter_map(|media| {
let MediaObject { content, thumbnails, .. } = media;
let attached_images: Vec<Url> = content
.iter()
.filter_map(|content| {
let MediaContent { url, content_type, .. } = content;
content_type.as_ref().and_then(|mime| {
if mime.ty() == "image" {
url.as_ref().map(|url| Url::new(url.clone()))
} else {
None
}
})
})
.collect();
let thumbnails = thumbnails.iter().map(|t| t.image.uri.as_str()).collect::<Vec<&str>>();
if let Some(&thumbnail) = thumbnails.first() {
Some(thumbnail.to_owned())
} else {
attached_images.first().map(|first_image| first_image.to_string())
}
})
.collect::<Vec<String>>()
.first()
.cloned();
if thumbnail_url.is_none() {
thumbnail_url = html.as_deref().and_then(crate::util::thumbnail::extract_thumbnail);
}
let mut enclosures = media
.into_iter()
.flat_map(|media| {
let MediaObject {
content,
mut title,
duration: object_duration,
mut description,
thumbnails,
..
} = media;
if let Some(text) = description.as_mut() {
Self::sanitize_text(text, website);
}
if let Some(text) = title.as_mut() {
Self::sanitize_text(text, website);
}
let description = description.map(|t| t.content);
let title = title.map(|t| t.content);
let thumbnail_url = thumbnails.first().map(|thumb| thumb.image.uri.clone());
let mut default_url = None;
content
.into_iter()
.filter_map(|content| {
let MediaContent {
url,
content_type,
duration: content_duration,
width,
height,
size,
..
} = content;
url.map(|url| {
let url = Url::new(url.clone());
let duration = if content_duration.is_some() { content_duration } else { object_duration };
let duration = duration.as_ref().map(Duration::as_secs).map(|secs| secs as i32);
let enclosure = Enclosure {
article_id: article_id.clone(),
url: url.clone(),
mime_type: content_type.as_ref().map(|mime| mime.to_string()),
title: title.clone(),
position: None,
summary: description.clone(),
thumbnail_url: thumbnail_url.clone(),
filesize: size.map(|s| s as i32),
width: width.map(|w| w as i32),
height: height.map(|w| w as i32),
duration,
framerate: None,
alternative: default_url.clone(),
is_default: false,
};
if default_url.is_none() {
default_url = Some(url);
}
enclosure
})
})
.collect::<Vec<Enclosure>>()
})
.collect::<Vec<Enclosure>>();
if let Some(largest_video) = enclosures.iter_mut().max_by_key(|enclosure| enclosure.height) {
largest_video.is_default = true;
}
let date = match published {
Some(published) => published,
None => match updated {
Some(updated) => updated,
None => Utc::now(),
},
};
let updated = if let Some(updated) = updated
&& updated > date
{
Some(updated)
} else {
None
};
let article = FatArticle {
article_id,
feed_id: feed_id.clone(),
title: title.map(|t| t.content),
url: article_url,
author: authors
.iter()
.filter_map(|person| if person.name.is_empty() { None } else { Some(person.name.clone()) })
.next(),
date,
synced: Utc::now(),
updated,
direction: None,
marked,
unread,
html,
scraped_content: None,
summary,
plain_text,
thumbnail_url,
};
Some((article, enclosures))
}
async fn download_and_parse_feed(
url: &Url,
etag: Option<&str>,
ignore_until: Option<DateTime<Utc>>,
client: &Client,
header: Option<HeaderMap<HeaderValue>>,
) -> Result<Option<DownloadSuccess>, DownloadFailure> {
if ignore_until.map(|dt| dt > Utc::now()).unwrap_or(false) {
tracing::debug!(%url, ?ignore_until, "ignore download");
return Ok(None);
}
let mut request_builder = client
.get(url.as_str())
.header(reqwest::header::ACCEPT, HeaderValue::from_static("application/json"))
.header(reqwest::header::ACCEPT, HeaderValue::from_static("application/feed+json"))
.header(reqwest::header::ACCEPT, HeaderValue::from_static("application/xml"))
.header(reqwest::header::ACCEPT, HeaderValue::from_static("application/rss+xml"))
.header(reqwest::header::ACCEPT, HeaderValue::from_static("application/atom+xml"))
.header(reqwest::header::ACCEPT, HeaderValue::from_static("text/xml"))
.header(reqwest::header::ACCEPT, HeaderValue::from_static("text/html"));
if let Some(header) = header {
request_builder = request_builder.headers(header)
}
let feed_response = match request_builder.send().await {
Ok(response) => response,
Err(error) => {
tracing::error!(%url, %error, "Downloading feed failed");
return Err(DownloadFailure {
msg: format!("Downloading feed failed: {url} - {error}"),
error: FeedApiError::Network(error),
retry_after: None,
});
}
};
let response_status = feed_response.status();
if !response_status.is_success() {
if response_status == reqwest::StatusCode::NOT_MODIFIED {
return Ok(None);
}
tracing::error!(%url, %response_status, "Downloading feed failed");
let retry_after = if response_status == reqwest::StatusCode::TOO_MANY_REQUESTS {
feed_response
.headers()
.get(reqwest::header::RETRY_AFTER)
.and_then(|hv| hv.to_str().ok())
.and_then(|str| str.parse::<u64>().ok())
} else {
None
};
let error = if let Err(error) = feed_response.error_for_status() {
FeedApiError::Network(error)
} else {
FeedApiError::Unknown
};
return Err(DownloadFailure {
msg: format!("Downloading feed failed: {response_status} - {url}"),
error,
retry_after,
});
}
let new_etag = feed_response
.headers()
.get(reqwest::header::ETAG)
.and_then(|hv| hv.to_str().ok())
.map(|etag| etag.to_string());
let cache_control = feed_response
.headers()
.get(reqwest::header::CACHE_CONTROL)
.and_then(|hv| hv.to_str().ok())
.and_then(cache_control::CacheControl::from_header);
if let (Some(old_etag), Some(new_etag)) = (etag, new_etag.as_deref())
&& old_etag == new_etag
{
return Ok(None);
}
let content_location = feed_response
.headers()
.get(reqwest::header::CONTENT_LOCATION)
.and_then(|hv| hv.to_str().ok())
.and_then(|cl| Url::parse(cl).ok());
let result_bytes = match feed_response.bytes().await {
Ok(result_bytes) => result_bytes,
Err(error) => {
tracing::error!(%url, %error, "Reading response as string failed");
return Err(DownloadFailure {
msg: format!("Reading response as string failed: {url} - {error}"),
error: FeedApiError::Network(error),
retry_after: None,
});
}
};
let feed_base_url = if let Some(content_location) = content_location {
content_location
} else {
url.clone()
};
let parser = ParserBuilder::new().base_uri(Some(feed_base_url)).sanitize_content(false).build();
let parse_result = parser.parse(result_bytes.as_ref());
let parsed_feed = if let Ok(parsed_feed) = parse_result {
parsed_feed
} else {
tracing::error!(%url, "Couldn't parse feed content");
return Err(DownloadFailure {
msg: format!("Couldn't parse feed content: {url}"),
error: FeedApiError::ParseFeed(FeedParserError::Feed),
retry_after: None,
});
};
Ok(Some(DownloadSuccess {
feed: parsed_feed,
etag: new_etag,
cache_control,
}))
}
fn feed_get_host_url(feed: &Feed) -> Option<(FeedID, String)> {
let url = feed.feed_url.as_ref()?;
let host = url.host_str()?;
let host = if let Some(host) = host.strip_prefix("www.") {
host
} else if let Some(host) = host.strip_prefix("www1.") {
host
} else if let Some(host) = host.strip_prefix("www2.") {
host
} else {
host
};
Some((feed.feed_id.clone(), host.to_string()))
}
}
#[async_trait]
impl FeedApi for LocalRSS {
fn features(&self) -> FeedApiResult<PluginCapabilities> {
Ok(PluginCapabilities::ADD_REMOVE_FEEDS
| PluginCapabilities::SUPPORT_CATEGORIES
| PluginCapabilities::SUPPORT_SUBCATEGORIES
| PluginCapabilities::MODIFY_CATEGORIES
| PluginCapabilities::SUPPORT_TAGS
| PluginCapabilities::EDIT_FEED_URLS)
}
fn has_user_configured(&self) -> FeedApiResult<bool> {
Ok(true)
}
async fn is_reachable(&self, _client: &Client) -> FeedApiResult<bool> {
Err(FeedApiError::Unsupported)
}
async fn is_logged_in(&self, _client: &Client) -> FeedApiResult<bool> {
Ok(true)
}
async fn user_name(&self) -> Option<String> {
None
}
async fn get_login_data(&self) -> Option<LoginData> {
Some(LoginData::None(LocalMetadata::get_id()))
}
async fn login(&mut self, _data: LoginData, _client: &Client) -> FeedApiResult<()> {
Ok(())
}
async fn logout(&mut self, _client: &Client) -> FeedApiResult<()> {
self.cache.read().await.delete()?;
Ok(())
}
async fn initial_sync(&self, client: &Client, custom_header: FeedHeaderMap) -> FeedApiResult<SyncResult> {
let feeds = self.portal.get_feeds()?;
let semaphore = self.portal.get_download_semaphore();
let feed_hash_map = Arc::new(RwLock::new(
feeds
.clone()
.into_iter()
.map(|feed| (feed.feed_id.clone(), feed))
.collect::<HashMap<FeedID, Feed>>(),
));
let mut delays: HashMap<FeedID, Duration> = HashMap::new();
let mut host_counts: HashMap<String, u64> = HashMap::new();
for (i, (feed_id, host)) in feeds.iter().filter_map(Self::feed_get_host_url).enumerate() {
let host_is_blacklisted = DELAY_HOST_BLACKLIST.contains(host.as_str());
let count = host_counts.entry(host).or_default();
*count += 1;
let mut delay_ms = i as u64 * GENERAL_DELAY_MS;
if *count > 1 && !host_is_blacklisted {
let delay = *count - 1;
delay_ms += delay * SAME_HOST_DELAY_MS;
tracing::debug!(%feed_id, %delay_ms, "delaying request");
}
delays.insert(feed_id, Duration::from_millis(delay_ms));
}
let mut task_handles = Vec::new();
for feed in feeds.into_iter() {
let client = client.clone();
let portal = self.portal.clone();
let hash_map = feed_hash_map.clone();
let semaphore = Arc::clone(&semaphore);
let cache = self.cache.clone();
let header = custom_header.get(&feed.feed_id).cloned();
let delay = delays.get(&feed.feed_id).copied();
task_handles.push(tokio::spawn(async move {
let Some(url) = feed.feed_url.clone() else {
tracing::warn!(%feed.feed_id, "No feed url for feed");
if let Some(old_feed) = hash_map.write().await.get_mut(&feed.feed_id) {
old_feed.error_count += 1;
old_feed.error_message = Some(format!("No feed url for feed: '{}'", feed.feed_id));
}
return Err(FeedApiError::ParseFeed(FeedParserError::NoUrl));
};
if let Some(delay) = delay {
tokio::time::sleep(delay).await;
}
let permit = match semaphore.acquire().await {
Ok(permit) => permit,
Err(error) => {
tracing::error!(%url, %error, "couldn't acquire download permit for feed");
return Err(error.into());
}
};
let old_etag = cache.read().await.get_etag(&feed.feed_id);
let ignore_until = cache.read().await.get_cache_control(&feed.feed_id);
let parsed_feed = match Self::download_and_parse_feed(&url, old_etag.as_deref(), ignore_until, &client, header).await {
Ok(Some(DownloadSuccess {
feed: parsed_feed,
etag,
cache_control,
})) => {
cache.write().await.set_etag(&feed.feed_id, etag);
cache
.write()
.await
.set_cache_control(&feed.feed_id, cache_control.and_then(|cc| cc.max_age).map(|max_age| max_age.as_secs()));
parsed_feed
}
Ok(None) => {
tracing::debug!(%feed.feed_id, "skipped downloading");
if let Some(old_feed) = hash_map.write().await.get_mut(&feed.feed_id) {
old_feed.error_count = 0;
old_feed.error_message = None;
}
return Ok((Vec::new(), Vec::new()));
}
Err(DownloadFailure { msg, error, retry_after }) => {
cache.write().await.set_cache_control(&feed.feed_id, retry_after);
if let Some(old_feed) = hash_map.write().await.get_mut(&feed.feed_id) {
old_feed.error_count += 1;
old_feed.error_message = Some(msg);
}
cache.write().await.set_etag(&feed.feed_id, None);
return Err(error);
}
};
drop(permit);
let updated_feed = Feed::from_feed_rs(&parsed_feed, None, &url);
if let Some(old_feed) = hash_map.write().await.get_mut(&feed.feed_id) {
old_feed.icon_url = updated_feed.icon_url;
old_feed.website = updated_feed.website;
old_feed.error_count = 0;
old_feed.error_message = None;
}
let conversion_result = parsed_feed
.entries
.into_iter()
.rev()
.filter_map(|e| Self::convert_entry(e, &feed.feed_id, feed.website.as_ref(), portal.clone()))
.collect::<Vec<_>>();
let (articles, enclosures): (Vec<FatArticle>, Vec<Vec<Enclosure>>) = conversion_result.into_iter().unzip();
let enclosures = enclosures.into_iter().flatten().collect();
Ok((articles, enclosures))
}));
}
let mut articles: Vec<FatArticle> = Vec::new();
let mut enclosures: Vec<Enclosure> = Vec::new();
let result_vec = futures::future::join_all(task_handles).await;
_ = self.cache.write().await.write();
for (mut feed_articles, mut feed_enclosures) in result_vec.into_iter().flatten().flatten() {
articles.append(&mut feed_articles);
enclosures.append(&mut feed_enclosures);
}
let feeds = Arc::into_inner(feed_hash_map)
.map(|lock| lock.into_inner())
.map(|map| map.into_values().collect())
.and_then(util::vec_to_option);
Ok(SyncResult {
feeds,
categories: None,
feed_mappings: None,
category_mappings: None,
tags: None,
headlines: None,
articles: util::vec_to_option(articles),
enclosures: util::vec_to_option(enclosures),
taggings: None,
})
}
async fn sync(&self, client: &Client, custom_header: FeedHeaderMap) -> FeedApiResult<SyncResult> {
self.initial_sync(client, custom_header).await
}
async fn fetch_feed(&self, feed_id: &FeedID, client: &Client, custom_header: HeaderMap<HeaderValue>) -> FeedApiResult<FeedUpdateResult> {
let feeds = self.portal.get_feeds()?;
let mut feed = feeds.into_iter().find(|f| &f.feed_id == feed_id).ok_or(FeedApiError::Unknown)?;
let url = feed.feed_url.clone().ok_or(FeedApiError::Unknown)?;
let parsed_feed = match Self::download_and_parse_feed(&url, None, None, client, Some(custom_header)).await {
Ok(Some(DownloadSuccess {
feed: parsed_feed,
etag,
cache_control,
})) => {
self.cache.write().await.set_etag(&feed.feed_id, etag);
self.cache
.write()
.await
.set_cache_control(&feed.feed_id, cache_control.and_then(|cc| cc.max_age).map(|max_age| max_age.as_secs()));
parsed_feed
}
Ok(None) => {
return Ok(FeedUpdateResult {
feed: None,
articles: None,
enclosures: None,
taggings: None,
});
}
Err(DownloadFailure { msg: _, error, retry_after }) => {
self.cache.write().await.set_cache_control(&feed.feed_id, retry_after);
return Err(error);
}
};
_ = self.cache.write().await.write();
let updated_feed = Feed::from_feed_rs(&parsed_feed, None, &url);
feed.icon_url = updated_feed.icon_url;
feed.website = updated_feed.website;
feed.error_count = 0;
feed.error_message = None;
let conversion_result = parsed_feed
.entries
.into_iter()
.rev()
.map(|e| Self::convert_entry(e, &feed.feed_id, feed.website.as_ref(), self.portal.clone()))
.collect::<Vec<_>>();
let (articles, enclosures): (Vec<FatArticle>, Vec<Vec<Enclosure>>) = conversion_result.into_iter().flatten().unzip();
let enclosures = enclosures.into_iter().flatten().collect();
Ok(FeedUpdateResult {
feed: Some(feed),
articles: util::vec_to_option(articles),
enclosures: util::vec_to_option(enclosures),
taggings: None,
})
}
async fn set_article_read(&self, _articles: &[ArticleID], _read: models::Read, _client: &Client) -> FeedApiResult<()> {
Ok(())
}
async fn set_article_marked(&self, _articles: &[ArticleID], _marked: models::Marked, _client: &Client) -> FeedApiResult<()> {
Ok(())
}
async fn set_feed_read(&self, _feeds: &[FeedID], _articles: &[ArticleID], _client: &Client) -> FeedApiResult<()> {
Ok(())
}
async fn set_category_read(&self, _categories: &[CategoryID], _articles: &[ArticleID], _client: &Client) -> FeedApiResult<()> {
Ok(())
}
async fn set_tag_read(&self, _tags: &[TagID], _articles: &[ArticleID], _client: &Client) -> FeedApiResult<()> {
Ok(())
}
async fn set_all_read(&self, _articles: &[ArticleID], _client: &Client) -> FeedApiResult<()> {
Ok(())
}
async fn add_feed(
&self,
url: &Url,
title: Option<String>,
category_id: Option<CategoryID>,
client: &Client,
) -> FeedApiResult<(Feed, Option<Category>)> {
let feed_response = client
.get(url.as_str())
.header(reqwest::header::ACCEPT, HeaderValue::from_static("application/json"))
.header(reqwest::header::ACCEPT, HeaderValue::from_static("application/feed+json"))
.header(reqwest::header::ACCEPT, HeaderValue::from_static("application/xml"))
.header(reqwest::header::ACCEPT, HeaderValue::from_static("application/rss+xml"))
.header(reqwest::header::ACCEPT, HeaderValue::from_static("application/atom+xml"))
.header(reqwest::header::ACCEPT, HeaderValue::from_static("text/xml"))
.header(reqwest::header::ACCEPT, HeaderValue::from_static("text/html"))
.send()
.await?
.error_for_status()?;
let result_bytes = feed_response
.bytes()
.await
.inspect_err(|error| tracing::error!(%url, %error, "Reading response as bytes failed"))?;
let parser = ParserBuilder::new().base_uri(Some(url)).build();
let feed = parser.parse(result_bytes.as_ref())?;
let feed = Feed::from_feed_rs(&feed, title, url);
let categories = self.portal.get_categories()?;
let category = categories.iter().find(|c| Some(&c.category_id) == category_id.as_ref()).cloned();
Ok((feed, category))
}
async fn remove_feed(&self, id: &FeedID, _client: &Client) -> FeedApiResult<()> {
let mut cache = self.cache.write().await;
cache.set_etag(id, None);
cache.set_cache_control(id, None);
_ = cache.write();
Ok(())
}
async fn move_feed(&self, _feed_id: &FeedID, _from: &CategoryID, _to: &CategoryID, _client: &Client) -> FeedApiResult<()> {
Ok(())
}
async fn rename_feed(&self, feed_id: &FeedID, _new_title: &str, _client: &Client) -> FeedApiResult<FeedID> {
Ok(feed_id.clone())
}
async fn edit_feed_url(&self, _feed_id: &FeedID, _new_url: &str, _client: &Client) -> FeedApiResult<()> {
Ok(())
}
async fn add_category<'a>(&self, _title: &str, _parent: Option<&'a CategoryID>, _client: &Client) -> FeedApiResult<CategoryID> {
let uuid = uuid::Uuid::new_v4();
Ok(CategoryID::from_owned(uuid.to_string()))
}
async fn remove_category(&self, id: &CategoryID, _remove_children: bool, _client: &Client) -> FeedApiResult<()> {
let mappings = self.portal.get_feed_mappings()?;
let mut cache = self.cache.write().await;
for mapping in mappings {
if &mapping.category_id == id {
cache.set_etag(&mapping.feed_id, None);
cache.set_cache_control(&mapping.feed_id, None);
}
}
_ = cache.write();
Ok(())
}
async fn rename_category(&self, id: &CategoryID, _new_title: &str, _client: &Client) -> FeedApiResult<CategoryID> {
Ok(id.clone())
}
async fn move_category(&self, _id: &CategoryID, _parent: &CategoryID, _client: &Client) -> FeedApiResult<()> {
Ok(())
}
async fn import_opml(&self, _opml: &str, _client: &Client) -> FeedApiResult<()> {
Ok(())
}
async fn add_tag(&self, title: &str, _client: &Client) -> FeedApiResult<TagID> {
Ok(TagID::new(title))
}
async fn remove_tag(&self, _id: &TagID, _client: &Client) -> FeedApiResult<()> {
Ok(())
}
async fn rename_tag(&self, id: &TagID, _new_title: &str, _client: &Client) -> FeedApiResult<TagID> {
Ok(id.clone())
}
async fn tag_article(&self, _article_id: &ArticleID, _tag_id: &TagID, _client: &Client) -> FeedApiResult<()> {
Ok(())
}
async fn untag_article(&self, _article_id: &ArticleID, _tag_id: &TagID, _client: &Client) -> FeedApiResult<()> {
Ok(())
}
async fn get_favicon(&self, feed_id: &FeedID, client: &Client, custom_header: HeaderMap<HeaderValue>) -> FeedApiResult<FavIcon> {
let Some(feed) = self.portal.get_feeds()?.into_iter().find(|f| &f.feed_id == feed_id) else {
return Err(FeedApiError::Unknown);
};
if let Some(feed_url) = feed.feed_url.as_ref() {
let old_etag = self.cache.read().await.get_etag(feed_id);
let ignore_until = self.cache.read().await.get_cache_control(feed_id);
let feed = Self::download_and_parse_feed(feed_url, old_etag.as_deref(), ignore_until, client, Some(custom_header))
.await
.map_err(
|DownloadFailure {
msg: _,
error,
retry_after: _,
}| error,
)?
.map(
|DownloadSuccess {
feed: feed_rs,
etag: _,
cache_control: _,
}| Feed::from_feed_rs(&feed_rs, None, feed_url),
)
.unwrap_or(feed);
_ = self.cache.write().await.write();
Ok(FavIcon {
feed_id: feed_id.clone(),
expires: Utc::now() + TimeDelta::try_days(EXPIRES_AFTER_DAYS).unwrap(),
format: None,
etag: None,
source_url: feed.icon_url,
data: None,
})
} else if let Some(icon_url) = feed.icon_url {
Ok(FavIcon {
feed_id: feed_id.clone(),
expires: Utc::now() + TimeDelta::try_days(EXPIRES_AFTER_DAYS).unwrap(),
format: None,
etag: None,
source_url: Some(icon_url),
data: None,
})
} else {
Err(FeedApiError::Unknown)
}
}
}
#[cfg(test)]
mod tests {
use crate::models::Url;
use test_log::test;
#[test]
fn clean_html_entity() {
let text = "Tired of Google's Tracking? Motorola's GrapheneOS-Powered Phones Are Coming";
let is_html = super::HTML_ENTITY_REGEX.is_match(text);
assert!(is_html);
let res = super::LocalRSS::sanitize_string(text, None);
assert_eq!(res, "Tired of Google's Tracking? Motorola's GrapheneOS-Powered Phones Are Coming");
}
#[test]
fn clean_twig233() {
let html = r#"
<blockquote>
<p>Exhibit gets animated!</p>
<p>The latest release adds animation playback and armature visualization, making it easier to preview rigged 3D models directly in GNOME. All thanks to F3D’s latest improvements.</p>
<p>Get it on <a href="https://flathub.org/apps/io.github.nokse22.Exhibit">Flathub</a></p>
<p>Checkout <a href="https://f3d.app">F3D</a></p>
<p><img width="924" height="611" loading="lazy" decoding="async" src="/_astro/exhibit.yGW-D87q_oC986.webp" ></p>
<p></p>
<p><video controls><source src="/posts/2026/01/twig-233/exhibit.mp4" type="video/mp4"></video></p>
</blockquote>
"#;
let url = Url::parse("https://thisweek.gnome.org/").unwrap();
let res = super::LocalRSS::sanitize_string(html, Some(&url));
assert!(res.contains("<video><source src="));
}
#[test]
fn empty_table_cell() {
let html = r#"
<table>
<tr>
<th>Company</th>
<th></th>
<th>Country</th>
</tr>
<tr>
<td>Alfreds Futterkiste</td>
<td>Maria Anders</td>
<td>Germany</td>
</tr>
<tr>
<td>Centro comercial Moctezuma</td>
<td>Francisco Chang</td>
<td>Mexico</td>
</tr>
</table>
"#;
let res = super::LocalRSS::sanitize_string(html, None);
assert!(res.contains("<th></th>"));
}
}