#[cfg(test)]
mod tests;
use crate::models::{Category, CategoryID, CategoryMapping, Feed, FeedID, FeedMapping, NEWSFLASH_TOPLEVEL, Url};
use crate::util::feed_parser::{self, ParsedUrl};
pub use opml::Error as OpmlError;
use opml::{Head, OPML, Outline};
use reqwest::Client;
use std::collections::HashSet;
use std::str;
use std::sync::Arc;
use tokio::sync::Semaphore;
pub struct OpmlResult {
pub categories: Vec<Category>,
pub feeds: Vec<Feed>,
pub feed_mappings: Vec<FeedMapping>,
pub category_mappings: Vec<CategoryMapping>,
}
pub fn generate_opml(
categories: &[Category],
category_mappings: &[CategoryMapping],
feeds: &[Feed],
feed_mappings: &[FeedMapping],
) -> Result<String, OpmlError> {
let mut opml = opml::OPML {
head: Some(Head {
title: Some("NewsFlash OPML export".into()),
..Head::default()
}),
..Default::default()
};
write_categories(
categories,
category_mappings,
feeds,
feed_mappings,
&NEWSFLASH_TOPLEVEL.clone(),
&mut opml.body.outlines,
);
let mapped_feed_ids = feed_mappings.iter().map(|fm| &fm.feed_id).collect::<HashSet<_>>();
let uncategorized_feeds = feeds.iter().filter(|f| !mapped_feed_ids.contains(&f.feed_id)).collect::<Vec<_>>();
for feed in uncategorized_feeds {
if let Some(xml_url) = &feed.feed_url {
opml.body.outlines.push(Outline {
text: feed.label.clone(),
title: Some(feed.label.clone()),
r#type: Some("rss".into()),
xml_url: Some(xml_url.to_string()),
html_url: feed.website.as_ref().map(|url| url.to_string()),
..Outline::default()
});
}
}
let xml_string = opml.to_string()?;
Ok(xml_string)
}
pub fn write_categories(
categories: &[Category],
category_mappings: &[CategoryMapping],
feeds: &[Feed],
feed_mappings: &[FeedMapping],
parent_id: &CategoryID,
outlines: &mut Vec<Outline>,
) {
let category_ids: HashSet<&CategoryID> = category_mappings
.iter()
.filter(|m| &m.parent_id == parent_id)
.map(|m| &m.category_id)
.collect();
let filtered_categories: Vec<&Category> = categories
.iter()
.filter(|category| category_ids.contains(&category.category_id))
.collect();
for category in filtered_categories {
let mut category_outline = Outline {
title: Some(category.label.clone()),
text: category.label.clone(),
..Outline::default()
};
write_categories(
categories,
category_mappings,
feeds,
feed_mappings,
&category.category_id,
&mut category_outline.outlines,
);
outlines.push(category_outline);
}
let feed_ids: Vec<&FeedID> = feed_mappings
.iter()
.filter(|mapping| &mapping.category_id == parent_id)
.map(|mapping| &mapping.feed_id)
.collect();
let feeds: Vec<&Feed> = feeds.iter().filter(|feed| feed_ids.contains(&&feed.feed_id)).collect();
for feed in feeds {
if let Some(xml_url) = &feed.feed_url {
outlines.push(Outline {
text: feed.label.clone(),
title: Some(feed.label.clone()),
r#type: Some("rss".into()),
xml_url: Some(xml_url.to_string()),
html_url: feed.website.as_ref().map(|url| url.to_string()),
..Outline::default()
});
}
}
}
pub async fn parse_opml(opml_string: &str, parse_all_feeds: bool, semaphore: Arc<Semaphore>, client: &Client) -> Result<OpmlResult, OpmlError> {
let opml = OPML::from_str(opml_string)?;
let mut category_vec: Vec<Category> = Vec::new();
let mut category_mapping_vec: Vec<CategoryMapping> = Vec::new();
let mut feed_vec: Vec<Feed> = Vec::new();
let mut feed_mapping_vec: Vec<FeedMapping> = Vec::new();
let mut sort_index = 0;
parse_outlines(
&opml.body.outlines,
&NEWSFLASH_TOPLEVEL,
&mut sort_index,
&mut category_vec,
&mut category_mapping_vec,
&mut feed_vec,
&mut feed_mapping_vec,
);
let mut task_handles = Vec::new();
for feed in feed_vec.into_iter().filter(|feed| feed.feed_url.is_some()) {
let semaphore = semaphore.clone();
let client = client.clone();
task_handles.push(tokio::spawn(async move {
if parse_all_feeds || feed.website.is_none() {
let xml_url = feed.feed_url.clone().unwrap();
match feed_parser::download_and_parse_feed(&xml_url, &feed.feed_id, Some(feed.label.clone()), semaphore, &client).await {
Ok(ParsedUrl::SingleFeed(parsed_feed)) => *parsed_feed,
Ok(ParsedUrl::MultipleFeeds(_)) => {
tracing::warn!(%xml_url, "Parsing feed resulted in multiple available feeds");
feed
}
Err(error) => {
tracing::warn!(%xml_url, %error, "Parsing feed failed, falling back to data from opml");
feed
}
}
} else {
feed
}
}));
}
let parsed_feeds = futures::future::join_all(task_handles).await.into_iter().flatten().collect::<Vec<_>>();
Ok(OpmlResult {
categories: category_vec,
feeds: parsed_feeds,
feed_mappings: feed_mapping_vec,
category_mappings: category_mapping_vec,
})
}
#[allow(clippy::too_many_arguments)]
fn parse_outlines(
outlines: &[Outline],
category_id: &CategoryID,
sort_index: &mut i32,
category_vec: &mut Vec<Category>,
category_mapping_vec: &mut Vec<CategoryMapping>,
feed_vec: &mut Vec<Feed>,
feed_mapping_vec: &mut Vec<FeedMapping>,
) {
for outline in outlines {
*sort_index += 1;
if let Some(xml_url) = &outline.xml_url {
let valid_feed_outline = if let Some(outline_type) = &outline.r#type {
outline_type == "rss" || outline_type == "atom"
} else {
true
};
if valid_feed_outline {
let feed_id = FeedID::new(xml_url);
let title = match &outline.title {
Some(title) => title.clone(),
None => {
if outline.text.is_empty() {
"No Title".into()
} else {
outline.text.clone()
}
}
};
let xml_url = Url::parse(xml_url).ok();
let mapping = if category_id == &*NEWSFLASH_TOPLEVEL {
None
} else {
Some(FeedMapping {
feed_id: feed_id.clone(),
category_id: category_id.clone(),
sort_index: Some(*sort_index),
})
};
let website = outline.html_url.as_ref().and_then(|url| Url::parse(url).ok());
let feed = Feed {
feed_id,
label: title.to_owned(),
website,
feed_url: xml_url,
icon_url: None,
error_count: 0,
error_message: None,
};
feed_vec.push(feed);
if let Some(mapping) = mapping {
feed_mapping_vec.push(mapping);
}
} else {
tracing::warn!("invalid feed outline");
}
} else {
let title = match &outline.title {
Some(title) => title.clone(),
None => {
if outline.text.is_empty() {
"No Title".into()
} else {
outline.text.clone()
}
}
};
let new_category_id = CategoryID::new(&title);
let category = Category {
category_id: new_category_id.clone(),
label: title.to_owned(),
};
category_vec.push(category);
category_mapping_vec.push(CategoryMapping {
parent_id: category_id.clone(),
category_id: new_category_id.clone(),
sort_index: Some(*sort_index),
});
parse_outlines(
&outline.outlines,
&new_category_id,
sort_index,
category_vec,
category_mapping_vec,
feed_vec,
feed_mapping_vec,
);
}
}
}