1mod cache;
2mod download_result;
3pub mod metadata;
4
5use self::metadata::LocalMetadata;
6use crate::error::FeedParserError;
7use crate::feed_api::FeedHeaderMap;
8use crate::models::{
9 self, ArticleID, Category, CategoryID, FatArticle, FavIcon, Feed, FeedID, FeedUpdateResult, LoginData, Marked, PluginCapabilities, Read,
10 SyncResult, TagID, Url,
11};
12use crate::util;
13use crate::util::favicons::EXPIRES_AFTER_DAYS;
14use crate::util::relative_url_evaluater::RelativeUrlEvaluater;
15use crate::util::text2html::Text2Html;
16use crate::{
17 feed_api::{FeedApi, FeedApiError, FeedApiResult, Portal},
18 models::Enclosure,
19};
20use ammonia::UrlRelative;
21use async_trait::async_trait;
22use cache::LocalRSSCache;
23use chrono::{DateTime, TimeDelta, Utc};
24use download_result::{DownloadFailure, DownloadSuccess};
25use feed_rs::model::{Entry, Link, MediaContent, MediaObject, Text};
26use feed_rs::parser::{Builder as ParserBuilder, ParseFeedError};
27use once_cell::sync::Lazy;
28use regex::{Regex, RegexBuilder};
29use reqwest::Client;
30use reqwest::header::{HeaderMap, HeaderValue};
31use std::collections::{HashMap, HashSet};
32use std::str;
33use std::sync::Arc;
34use std::time::Duration;
35use tokio::sync::RwLock;
36
37const SAME_HOST_DELAY_MS: u64 = 1500;
38const GENERAL_DELAY_MS: u64 = 100;
39
40static DELAY_HOST_BLACKLIST: Lazy<HashSet<&'static str>> = Lazy::new(|| {
41 let mut set = HashSet::new();
42 set.insert("youtube.com");
43 set.insert("medium.com");
44 set.insert("podcasts.apple.com");
45 set
46});
47
48static TAG_BLACKLIST: Lazy<HashSet<&'static str>> = Lazy::new(|| {
49 let mut tag_blacklist = HashSet::new();
50 tag_blacklist.insert("script");
51 tag_blacklist.insert("style");
52 tag_blacklist
53});
54
55static HTML_ENTITY_REGEX: Lazy<Regex> = Lazy::new(|| {
56 RegexBuilder::new(r#"&(?:[a-z\d]+|#\d+|#x[a-f\d]+)"#)
57 .case_insensitive(true)
58 .build()
59 .expect("HTML_ENTITY_REGEX regex")
60});
61
62impl From<ParseFeedError> for FeedApiError {
63 fn from(error: ParseFeedError) -> FeedApiError {
64 match error {
65 ParseFeedError::ParseError(e) => FeedApiError::Api {
66 message: format!("Error parsing feed: {e}"),
67 },
68 ParseFeedError::IoError(e) => FeedApiError::IO(e),
69 ParseFeedError::JsonSerde(e) => FeedApiError::Json {
70 source: e,
71 json: "Unavailable".into(),
72 },
73 ParseFeedError::JsonUnsupportedVersion(e) => FeedApiError::Api {
74 message: format!("Unsupported Json feed: {e}"),
75 },
76 ParseFeedError::XmlReader(e) => FeedApiError::Api {
77 message: format!("Error parsing xml: {e}"),
78 },
79 }
80 }
81}
82
83pub struct LocalRSS {
84 portal: Arc<Box<dyn Portal>>,
85 cache: Arc<RwLock<LocalRSSCache>>,
86}
87
88impl LocalRSS {
89 fn select_article_url(links: &[Link]) -> Option<Url> {
90 let mut url = links
91 .iter()
92 .find(|l| l.rel.as_deref() == Some("alternate"))
93 .and_then(|l| Url::parse(&l.href).ok());
94
95 if url.is_none() {
96 for link in links {
97 if let Ok(parsed_url) = Url::parse(&link.href) {
98 url = Some(parsed_url);
99 break;
100 }
101 }
102 }
103
104 url
105 }
106
107 fn sanitize_text(text: &mut Text, base_url: Option<&Url>) {
108 if text.content_type.as_str() != "text/plain" || ammonia::is_html(&text.content) || HTML_ENTITY_REGEX.is_match(&text.content) {
109 text.content = Self::sanitize_string(&text.content, base_url);
110 }
111 }
112
113 fn sanitize_string(text: &str, base_url: Option<&Url>) -> String {
114 let mut sanitizer = ammonia::Builder::default();
115 sanitizer.clean_content_tags(TAG_BLACKLIST.clone());
116 sanitizer.add_tags(&["video", "audio", "source", "iframe"]);
117 sanitizer.add_tag_attributes("source", &["src", "type"]);
118 sanitizer.add_tag_attributes("iframe", &["src"]);
119 sanitizer.add_generic_attributes(&["id"]);
120
121 if let Some(base_url) = base_url.cloned() {
122 let eval = RelativeUrlEvaluater::new(base_url);
123 sanitizer.url_relative(UrlRelative::Custom(Box::new(eval)));
124 }
125 sanitizer.clean(text).to_string()
126 }
127
128 fn convert_entry(entry: Entry, feed_id: &FeedID, website: Option<&Url>, portal: Arc<Box<dyn Portal>>) -> Option<(FatArticle, Vec<Enclosure>)> {
129 let Entry {
130 id,
131 updated,
132 mut title,
133 authors,
134 content,
135 links,
136 summary: mut entry_summary,
137 categories: _,
138 contributors: _,
139 published,
140 source: _,
141 rights: _,
142 media,
143 language: _,
144 base: _,
145 } = entry;
146
147 let mut article_id = ArticleID::new(&id);
148 let mut local_article = portal.get_article(&article_id).ok();
149
150 if local_article.as_ref().map(|a| &a.feed_id != feed_id).unwrap_or(false) {
151 tracing::debug!(%article_id, "article already published by another feed");
153 article_id = ArticleID::from_owned(format!("{id}:{feed_id}"));
154 local_article = portal.get_article(&article_id).ok();
155 }
156
157 let article_url = Self::select_article_url(&links);
158
159 let mut marked = Marked::Unmarked;
160 let mut unread = Read::Unread;
161
162 if let Some(local_article) = local_article {
164 if let Some(updated) = entry.updated {
165 if local_article.date >= updated {
166 return None;
167 }
168 } else {
169 return None;
172 }
173
174 marked = local_article.marked;
175 unread = local_article.unread;
176 }
177
178 if let Some(text) = title.as_mut() {
179 Self::sanitize_text(text, website);
180 }
181 if let Some(text) = entry_summary.as_mut() {
182 Self::sanitize_text(text, website);
183 }
184
185 let html = match content {
187 Some(content) => {
188 let xml_base = content.src.as_ref().map(|l| l.href.clone()).and_then(|xb| Url::parse(&xb).ok());
189 let xml_base = xml_base.map(Some).unwrap_or(website.cloned());
190 content.body.map(|body| Self::sanitize_string(&body, xml_base.as_ref()))
191 }
192 None => match &entry_summary {
193 Some(original_summary) => Some(original_summary.content.clone()),
194 None => media.first().and_then(|m| m.description.clone()).map(|mut text| {
195 Self::sanitize_text(&mut text, website);
196 text.content
197 }),
198 },
199 };
200 let html = html.map(|s| if !ammonia::is_html(&s) { Text2Html::process(&s) } else { s });
201
202 let plain_text = match &entry_summary {
203 Some(summary) => Some(util::html2text::html2text(&summary.content)),
204 None => html.as_deref().map(util::html2text::html2text),
205 };
206 let summary = plain_text.as_deref().map(util::html2text::text2summary);
207
208 let mut thumbnail_url = media
209 .iter()
210 .filter_map(|media| {
211 let MediaObject { content, thumbnails, .. } = media;
212 let attached_images: Vec<Url> = content
213 .iter()
214 .filter_map(|content| {
215 let MediaContent { url, content_type, .. } = content;
216 content_type.as_ref().and_then(|mime| {
217 if mime.ty() == "image" {
218 url.as_ref().map(|url| Url::new(url.clone()))
219 } else {
220 None
221 }
222 })
223 })
224 .collect();
225 let thumbnails = thumbnails.iter().map(|t| t.image.uri.as_str()).collect::<Vec<&str>>();
226 if let Some(&thumbnail) = thumbnails.first() {
227 Some(thumbnail.to_owned())
228 } else {
229 attached_images.first().map(|first_image| first_image.to_string())
230 }
231 })
232 .collect::<Vec<String>>()
233 .first()
234 .cloned();
235 if thumbnail_url.is_none() {
236 thumbnail_url = html.as_deref().and_then(crate::util::thumbnail::extract_thumbnail);
237 }
238
239 let mut enclosures = media
240 .into_iter()
241 .flat_map(|media| {
242 let MediaObject {
243 content,
244 mut title,
245 duration: object_duration,
246 mut description,
247 thumbnails,
248 ..
249 } = media;
250 if let Some(text) = description.as_mut() {
251 Self::sanitize_text(text, website);
252 }
253 if let Some(text) = title.as_mut() {
254 Self::sanitize_text(text, website);
255 }
256
257 let description = description.map(|t| t.content);
258 let title = title.map(|t| t.content);
259 let thumbnail_url = thumbnails.first().map(|thumb| thumb.image.uri.clone());
260
261 let mut default_url = None;
262
263 content
264 .into_iter()
265 .filter_map(|content| {
266 let MediaContent {
267 url,
268 content_type,
269 duration: content_duration,
270 width,
271 height,
272 size,
273 ..
274 } = content;
275 url.map(|url| {
276 let url = Url::new(url.clone());
277 let duration = if content_duration.is_some() { content_duration } else { object_duration };
278 let duration = duration.as_ref().map(Duration::as_secs).map(|secs| secs as i32);
279
280 let enclosure = Enclosure {
281 article_id: article_id.clone(),
282 url: url.clone(),
283 mime_type: content_type.as_ref().map(|mime| mime.to_string()),
284 title: title.clone(),
285 position: None,
286 summary: description.clone(),
287 thumbnail_url: thumbnail_url.clone(),
288 filesize: size.map(|s| s as i32),
289 width: width.map(|w| w as i32),
290 height: height.map(|w| w as i32),
291 duration,
292 framerate: None,
293 alternative: default_url.clone(),
294 is_default: false,
295 };
296
297 if default_url.is_none() {
298 default_url = Some(url);
299 }
300
301 enclosure
302 })
303 })
304 .collect::<Vec<Enclosure>>()
305 })
306 .collect::<Vec<Enclosure>>();
307
308 if let Some(largest_video) = enclosures.iter_mut().max_by_key(|enclosure| enclosure.height) {
309 largest_video.is_default = true;
310 }
311
312 let date = match published {
313 Some(published) => published,
314 None => match updated {
315 Some(updated) => updated,
316 None => Utc::now(),
317 },
318 };
319
320 let updated = if let Some(updated) = updated
321 && updated > date
322 {
323 Some(updated)
324 } else {
325 None
326 };
327
328 let article = FatArticle {
329 article_id,
330 feed_id: feed_id.clone(),
331 title: title.map(|t| t.content),
332 url: article_url,
333 author: authors
334 .iter()
335 .filter_map(|person| if person.name.is_empty() { None } else { Some(person.name.clone()) })
336 .next(),
337 date,
338 synced: Utc::now(),
339 updated,
340 direction: None,
341 marked,
342 unread,
343 html,
344 scraped_content: None,
345 summary,
346 plain_text,
347 thumbnail_url,
348 };
349
350 Some((article, enclosures))
351 }
352
353 async fn download_and_parse_feed(
354 url: &Url,
355 etag: Option<&str>,
356 ignore_until: Option<DateTime<Utc>>,
357 client: &Client,
358 header: Option<HeaderMap<HeaderValue>>,
359 ) -> Result<Option<DownloadSuccess>, DownloadFailure> {
360 if ignore_until.map(|dt| dt > Utc::now()).unwrap_or(false) {
361 tracing::debug!(%url, ?ignore_until, "ignore download");
362 return Ok(None);
363 }
364
365 let mut request_builder = client
366 .get(url.as_str())
367 .header(reqwest::header::ACCEPT, HeaderValue::from_static("application/json"))
368 .header(reqwest::header::ACCEPT, HeaderValue::from_static("application/feed+json"))
369 .header(reqwest::header::ACCEPT, HeaderValue::from_static("application/xml"))
370 .header(reqwest::header::ACCEPT, HeaderValue::from_static("application/rss+xml"))
371 .header(reqwest::header::ACCEPT, HeaderValue::from_static("application/atom+xml"))
372 .header(reqwest::header::ACCEPT, HeaderValue::from_static("text/xml"))
373 .header(reqwest::header::ACCEPT, HeaderValue::from_static("text/html"));
375
376 if let Some(header) = header {
377 request_builder = request_builder.headers(header)
378 }
379
380 let feed_response = match request_builder.send().await {
381 Ok(response) => response,
382 Err(error) => {
383 tracing::error!(%url, %error, "Downloading feed failed");
384 return Err(DownloadFailure {
385 msg: format!("Downloading feed failed: {url} - {error}"),
386 error: FeedApiError::Network(error),
387 retry_after: None,
388 });
389 }
390 };
391
392 let response_status = feed_response.status();
393
394 if !response_status.is_success() {
395 if response_status == reqwest::StatusCode::NOT_MODIFIED {
396 return Ok(None);
397 }
398
399 tracing::error!(%url, %response_status, "Downloading feed failed");
400
401 let retry_after = if response_status == reqwest::StatusCode::TOO_MANY_REQUESTS {
402 feed_response
403 .headers()
404 .get(reqwest::header::RETRY_AFTER)
405 .and_then(|hv| hv.to_str().ok())
406 .and_then(|str| str.parse::<u64>().ok())
407 } else {
408 None
409 };
410
411 let error = if let Err(error) = feed_response.error_for_status() {
412 FeedApiError::Network(error)
413 } else {
414 FeedApiError::Unknown
415 };
416
417 return Err(DownloadFailure {
418 msg: format!("Downloading feed failed: {response_status} - {url}"),
419 error,
420 retry_after,
421 });
422 }
423
424 let new_etag = feed_response
425 .headers()
426 .get(reqwest::header::ETAG)
427 .and_then(|hv| hv.to_str().ok())
428 .map(|etag| etag.to_string());
429
430 let cache_control = feed_response
431 .headers()
432 .get(reqwest::header::CACHE_CONTROL)
433 .and_then(|hv| hv.to_str().ok())
434 .and_then(cache_control::CacheControl::from_header);
435
436 if let (Some(old_etag), Some(new_etag)) = (etag, new_etag.as_deref())
437 && old_etag == new_etag
438 {
439 return Ok(None);
440 }
441
442 let content_location = feed_response
443 .headers()
444 .get(reqwest::header::CONTENT_LOCATION)
445 .and_then(|hv| hv.to_str().ok())
446 .and_then(|cl| Url::parse(cl).ok());
447
448 let result_bytes = match feed_response.bytes().await {
449 Ok(result_bytes) => result_bytes,
450 Err(error) => {
451 tracing::error!(%url, %error, "Reading response as string failed");
452 return Err(DownloadFailure {
453 msg: format!("Reading response as string failed: {url} - {error}"),
454 error: FeedApiError::Network(error),
455 retry_after: None,
456 });
457 }
458 };
459
460 let feed_base_url = if let Some(content_location) = content_location {
461 content_location
462 } else {
463 url.clone()
464 };
465
466 let parser = ParserBuilder::new().base_uri(Some(feed_base_url)).sanitize_content(false).build();
467 let parse_result = parser.parse(result_bytes.as_ref());
468
469 let parsed_feed = if let Ok(parsed_feed) = parse_result {
470 parsed_feed
471 } else {
472 tracing::error!(%url, "Couldn't parse feed content");
473 return Err(DownloadFailure {
474 msg: format!("Couldn't parse feed content: {url}"),
475 error: FeedApiError::ParseFeed(FeedParserError::Feed),
476 retry_after: None,
477 });
478 };
479
480 Ok(Some(DownloadSuccess {
481 feed: parsed_feed,
482 etag: new_etag,
483 cache_control,
484 }))
485 }
486
487 fn feed_get_host_url(feed: &Feed) -> Option<(FeedID, String)> {
488 let url = feed.feed_url.as_ref()?;
489 let host = url.host_str()?;
490 let host = if let Some(host) = host.strip_prefix("www.") {
491 host
492 } else if let Some(host) = host.strip_prefix("www1.") {
493 host
494 } else if let Some(host) = host.strip_prefix("www2.") {
495 host
496 } else {
497 host
498 };
499 Some((feed.feed_id.clone(), host.to_string()))
500 }
501}
502
503#[async_trait]
504impl FeedApi for LocalRSS {
505 fn features(&self) -> FeedApiResult<PluginCapabilities> {
506 Ok(PluginCapabilities::ADD_REMOVE_FEEDS
507 | PluginCapabilities::SUPPORT_CATEGORIES
508 | PluginCapabilities::SUPPORT_SUBCATEGORIES
509 | PluginCapabilities::MODIFY_CATEGORIES
510 | PluginCapabilities::SUPPORT_TAGS
511 | PluginCapabilities::EDIT_FEED_URLS)
512 }
513
514 fn has_user_configured(&self) -> FeedApiResult<bool> {
515 Ok(true)
516 }
517
518 async fn is_reachable(&self, _client: &Client) -> FeedApiResult<bool> {
519 Err(FeedApiError::Unsupported)
520 }
521
522 async fn is_logged_in(&self, _client: &Client) -> FeedApiResult<bool> {
523 Ok(true)
524 }
525
526 async fn user_name(&self) -> Option<String> {
527 None
528 }
529
530 async fn get_login_data(&self) -> Option<LoginData> {
531 Some(LoginData::None(LocalMetadata::get_id()))
532 }
533
534 async fn login(&mut self, _data: LoginData, _client: &Client) -> FeedApiResult<()> {
535 Ok(())
536 }
537
538 async fn logout(&mut self, _client: &Client) -> FeedApiResult<()> {
539 self.cache.read().await.delete()?;
540 Ok(())
541 }
542
543 async fn initial_sync(&self, client: &Client, custom_header: FeedHeaderMap) -> FeedApiResult<SyncResult> {
544 let feeds = self.portal.get_feeds()?;
545 let semaphore = self.portal.get_download_semaphore();
546
547 let feed_hash_map = Arc::new(RwLock::new(
548 feeds
549 .clone()
550 .into_iter()
551 .map(|feed| (feed.feed_id.clone(), feed))
552 .collect::<HashMap<FeedID, Feed>>(),
553 ));
554
555 let mut delays: HashMap<FeedID, Duration> = HashMap::new();
556 let mut host_counts: HashMap<String, u64> = HashMap::new();
557 for (i, (feed_id, host)) in feeds.iter().filter_map(Self::feed_get_host_url).enumerate() {
558 let host_is_blacklisted = DELAY_HOST_BLACKLIST.contains(host.as_str());
559 let count = host_counts.entry(host).or_default();
560 *count += 1;
561
562 let mut delay_ms = i as u64 * GENERAL_DELAY_MS;
563
564 if *count > 1 && !host_is_blacklisted {
565 let delay = *count - 1;
566 delay_ms += delay * SAME_HOST_DELAY_MS;
567 tracing::debug!(%feed_id, %delay_ms, "delaying request");
568 }
569
570 delays.insert(feed_id, Duration::from_millis(delay_ms));
571 }
572
573 let mut task_handles = Vec::new();
574
575 for feed in feeds.into_iter() {
576 let client = client.clone();
577 let portal = self.portal.clone();
578 let hash_map = feed_hash_map.clone();
579 let semaphore = Arc::clone(&semaphore);
580 let cache = self.cache.clone();
581 let header = custom_header.get(&feed.feed_id).cloned();
582 let delay = delays.get(&feed.feed_id).copied();
583
584 task_handles.push(tokio::spawn(async move {
585 let Some(url) = feed.feed_url.clone() else {
586 tracing::warn!(%feed.feed_id, "No feed url for feed");
587 if let Some(old_feed) = hash_map.write().await.get_mut(&feed.feed_id) {
588 old_feed.error_count += 1;
589 old_feed.error_message = Some(format!("No feed url for feed: '{}'", feed.feed_id));
590 }
591 return Err(FeedApiError::ParseFeed(FeedParserError::NoUrl));
592 };
593
594 if let Some(delay) = delay {
595 tokio::time::sleep(delay).await;
596 }
597
598 let permit = match semaphore.acquire().await {
600 Ok(permit) => permit,
601 Err(error) => {
602 tracing::error!(%url, %error, "couldn't acquire download permit for feed");
603 return Err(error.into());
604 }
605 };
606
607 let old_etag = cache.read().await.get_etag(&feed.feed_id);
608 let ignore_until = cache.read().await.get_cache_control(&feed.feed_id);
609 let parsed_feed = match Self::download_and_parse_feed(&url, old_etag.as_deref(), ignore_until, &client, header).await {
610 Ok(Some(DownloadSuccess {
611 feed: parsed_feed,
612 etag,
613 cache_control,
614 })) => {
615 cache.write().await.set_etag(&feed.feed_id, etag);
616 cache
617 .write()
618 .await
619 .set_cache_control(&feed.feed_id, cache_control.and_then(|cc| cc.max_age).map(|max_age| max_age.as_secs()));
620 parsed_feed
621 }
622 Ok(None) => {
623 tracing::debug!(%feed.feed_id, "skipped downloading");
624 if let Some(old_feed) = hash_map.write().await.get_mut(&feed.feed_id) {
625 old_feed.error_count = 0;
626 old_feed.error_message = None;
627 }
628 return Ok((Vec::new(), Vec::new()));
629 }
630 Err(DownloadFailure { msg, error, retry_after }) => {
631 cache.write().await.set_cache_control(&feed.feed_id, retry_after);
632
633 if let Some(old_feed) = hash_map.write().await.get_mut(&feed.feed_id) {
634 old_feed.error_count += 1;
635 old_feed.error_message = Some(msg);
636 }
637
638 cache.write().await.set_etag(&feed.feed_id, None);
639
640 return Err(error);
641 }
642 };
643
644 drop(permit);
646
647 let updated_feed = Feed::from_feed_rs(&parsed_feed, None, &url);
649 if let Some(old_feed) = hash_map.write().await.get_mut(&feed.feed_id) {
650 old_feed.icon_url = updated_feed.icon_url;
651 old_feed.website = updated_feed.website;
652 old_feed.error_count = 0;
653 old_feed.error_message = None;
654 }
655
656 let conversion_result = parsed_feed
657 .entries
658 .into_iter()
659 .rev()
660 .filter_map(|e| Self::convert_entry(e, &feed.feed_id, feed.website.as_ref(), portal.clone()))
661 .collect::<Vec<_>>();
662
663 let (articles, enclosures): (Vec<FatArticle>, Vec<Vec<Enclosure>>) = conversion_result.into_iter().unzip();
664 let enclosures = enclosures.into_iter().flatten().collect();
665
666 Ok((articles, enclosures))
667 }));
668 }
669
670 let mut articles: Vec<FatArticle> = Vec::new();
671 let mut enclosures: Vec<Enclosure> = Vec::new();
672
673 let result_vec = futures::future::join_all(task_handles).await;
674 _ = self.cache.write().await.write();
675
676 for (mut feed_articles, mut feed_enclosures) in result_vec.into_iter().flatten().flatten() {
677 articles.append(&mut feed_articles);
678 enclosures.append(&mut feed_enclosures);
679 }
680
681 let feeds = Arc::into_inner(feed_hash_map)
682 .map(|lock| lock.into_inner())
683 .map(|map| map.into_values().collect())
684 .and_then(util::vec_to_option);
685
686 Ok(SyncResult {
687 feeds,
688 categories: None,
689 feed_mappings: None,
690 category_mappings: None,
691 tags: None,
692 headlines: None,
693 articles: util::vec_to_option(articles),
694 enclosures: util::vec_to_option(enclosures),
695 taggings: None,
696 })
697 }
698
699 async fn sync(&self, client: &Client, custom_header: FeedHeaderMap) -> FeedApiResult<SyncResult> {
700 self.initial_sync(client, custom_header).await
701 }
702
703 async fn fetch_feed(&self, feed_id: &FeedID, client: &Client, custom_header: HeaderMap<HeaderValue>) -> FeedApiResult<FeedUpdateResult> {
704 let feeds = self.portal.get_feeds()?;
705 let mut feed = feeds.into_iter().find(|f| &f.feed_id == feed_id).ok_or(FeedApiError::Unknown)?;
706
707 let url = feed.feed_url.clone().ok_or(FeedApiError::Unknown)?;
708
709 let parsed_feed = match Self::download_and_parse_feed(&url, None, None, client, Some(custom_header)).await {
710 Ok(Some(DownloadSuccess {
711 feed: parsed_feed,
712 etag,
713 cache_control,
714 })) => {
715 self.cache.write().await.set_etag(&feed.feed_id, etag);
716 self.cache
717 .write()
718 .await
719 .set_cache_control(&feed.feed_id, cache_control.and_then(|cc| cc.max_age).map(|max_age| max_age.as_secs()));
720 parsed_feed
721 }
722 Ok(None) => {
723 return Ok(FeedUpdateResult {
724 feed: None,
725 articles: None,
726 enclosures: None,
727 taggings: None,
728 });
729 }
730 Err(DownloadFailure { msg: _, error, retry_after }) => {
731 self.cache.write().await.set_cache_control(&feed.feed_id, retry_after);
732 return Err(error);
733 }
734 };
735
736 _ = self.cache.write().await.write();
737
738 let updated_feed = Feed::from_feed_rs(&parsed_feed, None, &url);
740 feed.icon_url = updated_feed.icon_url;
741 feed.website = updated_feed.website;
742 feed.error_count = 0;
743 feed.error_message = None;
744
745 let conversion_result = parsed_feed
746 .entries
747 .into_iter()
748 .rev()
749 .map(|e| Self::convert_entry(e, &feed.feed_id, feed.website.as_ref(), self.portal.clone()))
750 .collect::<Vec<_>>();
751
752 let (articles, enclosures): (Vec<FatArticle>, Vec<Vec<Enclosure>>) = conversion_result.into_iter().flatten().unzip();
753 let enclosures = enclosures.into_iter().flatten().collect();
754
755 Ok(FeedUpdateResult {
756 feed: Some(feed),
757 articles: util::vec_to_option(articles),
758 enclosures: util::vec_to_option(enclosures),
759 taggings: None,
760 })
761 }
762
763 async fn set_article_read(&self, _articles: &[ArticleID], _read: models::Read, _client: &Client) -> FeedApiResult<()> {
764 Ok(())
765 }
766
767 async fn set_article_marked(&self, _articles: &[ArticleID], _marked: models::Marked, _client: &Client) -> FeedApiResult<()> {
768 Ok(())
769 }
770
771 async fn set_feed_read(&self, _feeds: &[FeedID], _articles: &[ArticleID], _client: &Client) -> FeedApiResult<()> {
772 Ok(())
773 }
774
775 async fn set_category_read(&self, _categories: &[CategoryID], _articles: &[ArticleID], _client: &Client) -> FeedApiResult<()> {
776 Ok(())
777 }
778
779 async fn set_tag_read(&self, _tags: &[TagID], _articles: &[ArticleID], _client: &Client) -> FeedApiResult<()> {
780 Ok(())
781 }
782
783 async fn set_all_read(&self, _articles: &[ArticleID], _client: &Client) -> FeedApiResult<()> {
784 Ok(())
785 }
786
787 async fn add_feed(
788 &self,
789 url: &Url,
790 title: Option<String>,
791 category_id: Option<CategoryID>,
792 client: &Client,
793 ) -> FeedApiResult<(Feed, Option<Category>)> {
794 let feed_response = client
795 .get(url.as_str())
796 .header(reqwest::header::ACCEPT, HeaderValue::from_static("application/json"))
797 .header(reqwest::header::ACCEPT, HeaderValue::from_static("application/feed+json"))
798 .header(reqwest::header::ACCEPT, HeaderValue::from_static("application/xml"))
799 .header(reqwest::header::ACCEPT, HeaderValue::from_static("application/rss+xml"))
800 .header(reqwest::header::ACCEPT, HeaderValue::from_static("application/atom+xml"))
801 .header(reqwest::header::ACCEPT, HeaderValue::from_static("text/xml"))
802 .header(reqwest::header::ACCEPT, HeaderValue::from_static("text/html"))
804 .send()
805 .await?
806 .error_for_status()?;
807
808 let result_bytes = feed_response
809 .bytes()
810 .await
811 .inspect_err(|error| tracing::error!(%url, %error, "Reading response as bytes failed"))?;
812
813 let parser = ParserBuilder::new().base_uri(Some(url)).build();
814 let feed = parser.parse(result_bytes.as_ref())?;
815 let feed = Feed::from_feed_rs(&feed, title, url);
816
817 let categories = self.portal.get_categories()?;
818 let category = categories.iter().find(|c| Some(&c.category_id) == category_id.as_ref()).cloned();
819
820 Ok((feed, category))
821 }
822
823 async fn remove_feed(&self, id: &FeedID, _client: &Client) -> FeedApiResult<()> {
824 let mut cache = self.cache.write().await;
825 cache.set_etag(id, None);
826 cache.set_cache_control(id, None);
827 _ = cache.write();
828 Ok(())
829 }
830
831 async fn move_feed(&self, _feed_id: &FeedID, _from: &CategoryID, _to: &CategoryID, _client: &Client) -> FeedApiResult<()> {
832 Ok(())
833 }
834
835 async fn rename_feed(&self, feed_id: &FeedID, _new_title: &str, _client: &Client) -> FeedApiResult<FeedID> {
836 Ok(feed_id.clone())
837 }
838
839 async fn edit_feed_url(&self, _feed_id: &FeedID, _new_url: &str, _client: &Client) -> FeedApiResult<()> {
840 Ok(())
841 }
842
843 async fn add_category<'a>(&self, _title: &str, _parent: Option<&'a CategoryID>, _client: &Client) -> FeedApiResult<CategoryID> {
844 let uuid = uuid::Uuid::new_v4();
845 Ok(CategoryID::from_owned(uuid.to_string()))
846 }
847
848 async fn remove_category(&self, id: &CategoryID, _remove_children: bool, _client: &Client) -> FeedApiResult<()> {
849 let mappings = self.portal.get_feed_mappings()?;
850 let mut cache = self.cache.write().await;
851 for mapping in mappings {
852 if &mapping.category_id == id {
853 cache.set_etag(&mapping.feed_id, None);
854 cache.set_cache_control(&mapping.feed_id, None);
855 }
856 }
857
858 _ = cache.write();
859
860 Ok(())
861 }
862
863 async fn rename_category(&self, id: &CategoryID, _new_title: &str, _client: &Client) -> FeedApiResult<CategoryID> {
864 Ok(id.clone())
865 }
866
867 async fn move_category(&self, _id: &CategoryID, _parent: &CategoryID, _client: &Client) -> FeedApiResult<()> {
868 Ok(())
869 }
870
871 async fn import_opml(&self, _opml: &str, _client: &Client) -> FeedApiResult<()> {
872 Ok(())
873 }
874
875 async fn add_tag(&self, title: &str, _client: &Client) -> FeedApiResult<TagID> {
876 Ok(TagID::new(title))
877 }
878
879 async fn remove_tag(&self, _id: &TagID, _client: &Client) -> FeedApiResult<()> {
880 Ok(())
881 }
882
883 async fn rename_tag(&self, id: &TagID, _new_title: &str, _client: &Client) -> FeedApiResult<TagID> {
884 Ok(id.clone())
885 }
886
887 async fn tag_article(&self, _article_id: &ArticleID, _tag_id: &TagID, _client: &Client) -> FeedApiResult<()> {
888 Ok(())
889 }
890
891 async fn untag_article(&self, _article_id: &ArticleID, _tag_id: &TagID, _client: &Client) -> FeedApiResult<()> {
892 Ok(())
893 }
894
895 async fn get_favicon(&self, feed_id: &FeedID, client: &Client, custom_header: HeaderMap<HeaderValue>) -> FeedApiResult<FavIcon> {
896 let Some(feed) = self.portal.get_feeds()?.into_iter().find(|f| &f.feed_id == feed_id) else {
897 return Err(FeedApiError::Unknown);
898 };
899
900 if let Some(feed_url) = feed.feed_url.as_ref() {
901 let old_etag = self.cache.read().await.get_etag(feed_id);
902 let ignore_until = self.cache.read().await.get_cache_control(feed_id);
903 let feed = Self::download_and_parse_feed(feed_url, old_etag.as_deref(), ignore_until, client, Some(custom_header))
904 .await
905 .map_err(
906 |DownloadFailure {
907 msg: _,
908 error,
909 retry_after: _,
910 }| error,
911 )?
912 .map(
913 |DownloadSuccess {
914 feed: feed_rs,
915 etag: _,
916 cache_control: _,
917 }| Feed::from_feed_rs(&feed_rs, None, feed_url),
918 )
919 .unwrap_or(feed);
920 _ = self.cache.write().await.write();
921
922 Ok(FavIcon {
923 feed_id: feed_id.clone(),
924 expires: Utc::now() + TimeDelta::try_days(EXPIRES_AFTER_DAYS).unwrap(),
925 format: None,
926 etag: None,
927 source_url: feed.icon_url,
928 data: None,
929 })
930 } else if let Some(icon_url) = feed.icon_url {
931 Ok(FavIcon {
932 feed_id: feed_id.clone(),
933 expires: Utc::now() + TimeDelta::try_days(EXPIRES_AFTER_DAYS).unwrap(),
934 format: None,
935 etag: None,
936 source_url: Some(icon_url),
937 data: None,
938 })
939 } else {
940 Err(FeedApiError::Unknown)
941 }
942 }
943}
944
945#[cfg(test)]
946mod tests {
947 use crate::models::Url;
948 use test_log::test;
949
950 #[test]
951 fn clean_html_entity() {
952 let text = "Tired of Google's Tracking? Motorola's GrapheneOS-Powered Phones Are Coming";
953 let is_html = super::HTML_ENTITY_REGEX.is_match(text);
954 assert!(is_html);
955 let res = super::LocalRSS::sanitize_string(text, None);
956 assert_eq!(res, "Tired of Google's Tracking? Motorola's GrapheneOS-Powered Phones Are Coming");
957 }
958
959 #[test]
960 fn clean_twig233() {
961 let html = r#"
962 <blockquote>
963 <p>Exhibit gets animated!</p>
964 <p>The latest release adds animation playback and armature visualization, making it easier to preview rigged 3D models directly in GNOME. All thanks to F3D’s latest improvements.</p>
965 <p>Get it on <a href="https://flathub.org/apps/io.github.nokse22.Exhibit">Flathub</a></p>
966 <p>Checkout <a href="https://f3d.app">F3D</a></p>
967 <p><img width="924" height="611" loading="lazy" decoding="async" src="/_astro/exhibit.yGW-D87q_oC986.webp" ></p>
968 <p></p>
969 <p><video controls><source src="/posts/2026/01/twig-233/exhibit.mp4" type="video/mp4"></video></p>
970 </blockquote>
971 "#;
972
973 let url = Url::parse("https://thisweek.gnome.org/").unwrap();
974 let res = super::LocalRSS::sanitize_string(html, Some(&url));
975 assert!(res.contains("<video><source src="));
976 }
977
978 #[test]
979 fn empty_table_cell() {
980 let html = r#"
981 <table>
982 <tr>
983 <th>Company</th>
984 <th></th>
985 <th>Country</th>
986 </tr>
987 <tr>
988 <td>Alfreds Futterkiste</td>
989 <td>Maria Anders</td>
990 <td>Germany</td>
991 </tr>
992 <tr>
993 <td>Centro comercial Moctezuma</td>
994 <td>Francisco Chang</td>
995 <td>Mexico</td>
996 </tr>
997 </table>
998 "#;
999
1000 let res = super::LocalRSS::sanitize_string(html, None);
1001 assert!(res.contains("<th></th>"));
1002 }
1003}