news-flash 3.0.0

Base library for a modern feed reader
Documentation
#[cfg(test)]
mod tests;

use crate::models::{Category, CategoryID, CategoryMapping, Feed, FeedID, FeedMapping, NEWSFLASH_TOPLEVEL, Url};
use crate::util::feed_parser::{self, ParsedUrl};
pub use opml::Error as OpmlError;
use opml::{Head, OPML, Outline};
use reqwest::Client;
use std::collections::HashSet;
use std::str;
use std::sync::Arc;
use tokio::sync::Semaphore;

pub struct OpmlResult {
    pub categories: Vec<Category>,
    pub feeds: Vec<Feed>,
    pub feed_mappings: Vec<FeedMapping>,
    pub category_mappings: Vec<CategoryMapping>,
}

pub fn generate_opml(
    categories: &[Category],
    category_mappings: &[CategoryMapping],
    feeds: &[Feed],
    feed_mappings: &[FeedMapping],
) -> Result<String, OpmlError> {
    let mut opml = opml::OPML {
        head: Some(Head {
            title: Some("NewsFlash OPML export".into()),
            ..Head::default()
        }),
        ..Default::default()
    };

    write_categories(
        categories,
        category_mappings,
        feeds,
        feed_mappings,
        &NEWSFLASH_TOPLEVEL.clone(),
        &mut opml.body.outlines,
    );

    let mapped_feed_ids = feed_mappings.iter().map(|fm| &fm.feed_id).collect::<HashSet<_>>();
    let uncategorized_feeds = feeds.iter().filter(|f| !mapped_feed_ids.contains(&f.feed_id)).collect::<Vec<_>>();

    for feed in uncategorized_feeds {
        if let Some(xml_url) = &feed.feed_url {
            opml.body.outlines.push(Outline {
                text: feed.label.clone(),
                title: Some(feed.label.clone()),
                r#type: Some("rss".into()),
                xml_url: Some(xml_url.to_string()),
                html_url: feed.website.as_ref().map(|url| url.to_string()),
                ..Outline::default()
            });
        }
    }

    let xml_string = opml.to_string()?;
    Ok(xml_string)
}

pub fn write_categories(
    categories: &[Category],
    category_mappings: &[CategoryMapping],
    feeds: &[Feed],
    feed_mappings: &[FeedMapping],
    parent_id: &CategoryID,
    outlines: &mut Vec<Outline>,
) {
    let category_ids: HashSet<&CategoryID> = category_mappings
        .iter()
        .filter(|m| &m.parent_id == parent_id)
        .map(|m| &m.category_id)
        .collect();
    let filtered_categories: Vec<&Category> = categories
        .iter()
        .filter(|category| category_ids.contains(&category.category_id))
        .collect();

    for category in filtered_categories {
        let mut category_outline = Outline {
            title: Some(category.label.clone()),
            text: category.label.clone(),
            ..Outline::default()
        };

        write_categories(
            categories,
            category_mappings,
            feeds,
            feed_mappings,
            &category.category_id,
            &mut category_outline.outlines,
        );
        outlines.push(category_outline);
    }

    let feed_ids: Vec<&FeedID> = feed_mappings
        .iter()
        .filter(|mapping| &mapping.category_id == parent_id)
        .map(|mapping| &mapping.feed_id)
        .collect();

    let feeds: Vec<&Feed> = feeds.iter().filter(|feed| feed_ids.contains(&&feed.feed_id)).collect();

    for feed in feeds {
        if let Some(xml_url) = &feed.feed_url {
            outlines.push(Outline {
                text: feed.label.clone(),
                title: Some(feed.label.clone()),
                r#type: Some("rss".into()),
                xml_url: Some(xml_url.to_string()),
                html_url: feed.website.as_ref().map(|url| url.to_string()),
                ..Outline::default()
            });
        }
    }
}

pub async fn parse_opml(opml_string: &str, parse_all_feeds: bool, semaphore: Arc<Semaphore>, client: &Client) -> Result<OpmlResult, OpmlError> {
    let opml = OPML::from_str(opml_string)?;
    let mut category_vec: Vec<Category> = Vec::new();
    let mut category_mapping_vec: Vec<CategoryMapping> = Vec::new();
    let mut feed_vec: Vec<Feed> = Vec::new();
    let mut feed_mapping_vec: Vec<FeedMapping> = Vec::new();
    let mut sort_index = 0;

    parse_outlines(
        &opml.body.outlines,
        &NEWSFLASH_TOPLEVEL,
        &mut sort_index,
        &mut category_vec,
        &mut category_mapping_vec,
        &mut feed_vec,
        &mut feed_mapping_vec,
    );

    let mut task_handles = Vec::new();

    for feed in feed_vec.into_iter().filter(|feed| feed.feed_url.is_some()) {
        let semaphore = semaphore.clone();
        let client = client.clone();

        task_handles.push(tokio::spawn(async move {
            if parse_all_feeds || feed.website.is_none() {
                let xml_url = feed.feed_url.clone().unwrap();

                match feed_parser::download_and_parse_feed(&xml_url, &feed.feed_id, Some(feed.label.clone()), semaphore, &client).await {
                    Ok(ParsedUrl::SingleFeed(parsed_feed)) => *parsed_feed,
                    Ok(ParsedUrl::MultipleFeeds(_)) => {
                        tracing::warn!(%xml_url, "Parsing feed resulted in multiple available feeds");
                        feed
                    }
                    Err(error) => {
                        tracing::warn!(%xml_url, %error, "Parsing feed failed, falling back to data from opml");
                        feed
                    }
                }
            } else {
                feed
            }
        }));
    }

    let parsed_feeds = futures::future::join_all(task_handles).await.into_iter().flatten().collect::<Vec<_>>();

    Ok(OpmlResult {
        categories: category_vec,
        feeds: parsed_feeds,
        feed_mappings: feed_mapping_vec,
        category_mappings: category_mapping_vec,
    })
}

#[allow(clippy::too_many_arguments)]
fn parse_outlines(
    outlines: &[Outline],
    category_id: &CategoryID,
    sort_index: &mut i32,
    category_vec: &mut Vec<Category>,
    category_mapping_vec: &mut Vec<CategoryMapping>,
    feed_vec: &mut Vec<Feed>,
    feed_mapping_vec: &mut Vec<FeedMapping>,
) {
    for outline in outlines {
        *sort_index += 1;

        if let Some(xml_url) = &outline.xml_url {
            // feed
            let valid_feed_outline = if let Some(outline_type) = &outline.r#type {
                outline_type == "rss" || outline_type == "atom"
            } else {
                true
            };

            if valid_feed_outline {
                let feed_id = FeedID::new(xml_url);

                // prefer optional "title" attribute, fall back to mandatory "text" attribute
                let title = match &outline.title {
                    Some(title) => title.clone(),
                    None => {
                        if outline.text.is_empty() {
                            "No Title".into()
                        } else {
                            outline.text.clone()
                        }
                    }
                };
                let xml_url = Url::parse(xml_url).ok();
                let mapping = if category_id == &*NEWSFLASH_TOPLEVEL {
                    None
                } else {
                    Some(FeedMapping {
                        feed_id: feed_id.clone(),
                        category_id: category_id.clone(),
                        sort_index: Some(*sort_index),
                    })
                };
                let website = outline.html_url.as_ref().and_then(|url| Url::parse(url).ok());

                let feed = Feed {
                    feed_id,
                    label: title.to_owned(),
                    website,
                    feed_url: xml_url,
                    icon_url: None,
                    error_count: 0,
                    error_message: None,
                };

                feed_vec.push(feed);
                if let Some(mapping) = mapping {
                    feed_mapping_vec.push(mapping);
                }
            } else {
                tracing::warn!("invalid feed outline");
            }
        } else {
            // category
            let title = match &outline.title {
                Some(title) => title.clone(),
                None => {
                    if outline.text.is_empty() {
                        "No Title".into()
                    } else {
                        outline.text.clone()
                    }
                }
            };
            let new_category_id = CategoryID::new(&title);
            let category = Category {
                category_id: new_category_id.clone(),
                label: title.to_owned(),
            };
            category_vec.push(category);
            category_mapping_vec.push(CategoryMapping {
                parent_id: category_id.clone(),
                category_id: new_category_id.clone(),
                sort_index: Some(*sort_index),
            });

            parse_outlines(
                &outline.outlines,
                &new_category_id,
                sort_index,
                category_vec,
                category_mapping_vec,
                feed_vec,
                feed_mapping_vec,
            );
        }
    }
}