bwsr 0.1.0

basic web syndication reader, read RSS/Atom feeds and output simplified entry data
use chrono::{DateTime, Days, Utc};
use clap::Parser;
use feed_rs::model::Text;
use feed_rs::parser::parse;
use rayon::prelude::*;
use std::error::Error;
use std::io;
use std::io::{BufRead, BufReader, Read};
use std::process::exit;

struct FeedEntry {
    feed_title: String,
    entry_title: String,
    entry_date: DateTime<Utc>,
    entry_url: String,
}

/// bswr: basic web syndication reader
#[derive(Parser)]
#[command(version, about, long_about = None)]
struct Args {
    /// Maximum age of the entries to read in days
    #[arg(short, long, default_value = "2")]
    age_days: u64,
    /// Feed entry field delimiter for printed output
    #[arg(short, long, default_value = "|")]
    delim: String,
}

/// Returns vector with each line read from the input stream, ignores empty or comment ('#') lines.
fn read_input_stream<R: Read>(input_stream: R) -> Vec<String> {
    BufReader::new(input_stream)
        .lines()
        .filter(|l| {
            l.is_ok() && !(l.as_ref().unwrap().is_empty() || l.as_ref().unwrap().starts_with('#'))
        })
        .map(|l| l.unwrap().trim().to_string())
        .collect()
}

/// Returns current date offset by the specified days.
fn get_filter_date(offset_days: u64) -> DateTime<Utc> {
    Utc::now().checked_sub_days(Days::new(offset_days)).unwrap()
}

/// Returns plain Text struct with provided string as content.
fn make_default_text(text_content: &str) -> Text {
    Text {
        content_type: mime::TEXT_PLAIN,
        src: None,
        content: text_content.to_string(),
    }
}

/// Returns contents of the provided RSS/Atom feed URL.
fn get_feed_content(feed_url: &str) -> Result<String, Box<dyn Error>> {
    let mut res = reqwest::blocking::get(feed_url)?;
    let mut content = String::new();
    res.read_to_string(&mut content)?;
    Ok(content)
}

/// Returns vector with parsed entries from the RSS/Atom feed content string provided,
/// filtering out entries older than the specified date.
fn parse_feed(feed_url: &str, filter_date: DateTime<Utc>) -> Option<Vec<FeedEntry>> {
    let Ok(feed_content) = get_feed_content(feed_url) else {
        eprintln!("bwsr: warning: feedparser: unable to get feed > {feed_url}");
        return None;
    };

    let Ok(feed) = parse(feed_content.as_bytes()) else {
        eprintln!("bwsr: warning: unable to parse feed > {feed_url}");
        return None;
    };

    let feed_title = feed.title.unwrap_or(make_default_text("Unknown Channel")).content;
    let entries = feed.entries
        .iter()
        .filter(|e| e.published.is_some() && e.published.unwrap() >= filter_date)
        .map(|e| FeedEntry {
            feed_title: feed_title.clone(),
            entry_title: e.title.clone().unwrap_or(make_default_text("Unknown Title")).content,
            entry_date: e.published.unwrap_or(Utc::now()),
            entry_url: (&e.links)[0].clone().href,
        })
        .collect::<Vec<FeedEntry>>();
    Some(entries)
}

fn main() {
    let args = Args::parse();
    let delim = args.delim;
    let offset_days = args.age_days;
    let filter_date = get_filter_date(offset_days);

    let url_list = if atty::isnt(atty::Stream::Stdin) {
        read_input_stream(io::stdin())
    } else {
        eprintln!("bwsr: error: stdin buffer is empty, no url list provided");
        exit(1);
    };

    let mut entries = url_list
        .par_iter()
        .map(|url| parse_feed(url, filter_date))
        .filter(Option::is_some)
        .flat_map(Option::unwrap)
        .collect::<Vec<FeedEntry>>();
    entries.par_sort_unstable_by_key(|e| e.entry_date);
    entries.reverse();

    for item in entries {
        println!(
            "{}{delim}{}{delim}{}{delim}{}",
            item.feed_title.replace(&delim, "_"),
            item.entry_title.replace(&delim, "_"),
            item.entry_date,
            item.entry_url
        );
    }
}