rss2email_lib/
lib.rs

1#![allow(clippy::missing_panics_doc)]
2
3use std::{fmt::Display, fs, time::SystemTime};
4
5use chrono::{DateTime, Utc};
6pub use error::Error;
7use futures::{stream, StreamExt};
8use itertools::Itertools;
9use lazy_static::lazy_static;
10use regex::Regex;
11use reqwest::Client;
12use std::fmt::Write as _;
13use tokio::runtime::Handle;
14
15pub use blog::{Blog, Post};
16mod blog;
17pub mod email;
18mod error;
19pub mod logger;
20pub mod xml;
21
22use crate::xml::parse_web_feed;
23
24const CONCURRENT_REQUESTS: usize = 10;
25
26/// Downloads all the web feeds specified in `feeds.txt` and converts them to `Blog`s.
27pub fn download_blogs(days: i64) -> Vec<Blog> {
28  let links = read_feeds();
29
30  let contents = if let Ok(handle) = Handle::try_current() {
31    std::thread::spawn(move || handle.block_on(get_blogs(links)))
32      .join()
33      .expect("Error spawning blog download")
34  } else {
35    let rt = tokio::runtime::Builder::new_current_thread()
36      .enable_all()
37      .build()
38      .expect("Could not build tokio runtime");
39
40    rt.block_on(get_blogs(links))
41  };
42
43  let contents: Vec<Blog> = contents
44    .into_iter()
45    .filter_map(|x| match x {
46      Some(x) => {
47        if !within_n_days(days, &x.most_recent_pub_date) {
48          return None;
49        }
50
51        let recent_posts: Vec<Post> = x
52          .posts
53          .into_iter()
54          .filter(|x| within_n_days(days, &x.pub_date))
55          .collect();
56
57        let non_empty = !recent_posts.is_empty();
58
59        non_empty.then_some(Blog {
60          posts: recent_posts,
61          ..x
62        })
63      }
64      None => None,
65    })
66    .collect();
67
68  contents
69}
70
71/// Helper method for [download_blogs](download_blogs).
72async fn get_blogs(links: Vec<String>) -> Vec<Option<Blog>> {
73  let client = Client::new();
74  stream::iter(links)
75    .map(|link| {
76      let client = &client;
77      async move {
78        let xml = get_page_async(link.as_str(), client)
79          .await
80          .map_err(|e| warn!("Error in {}\n{}", link, e))
81          .ok()?;
82
83        parse_web_feed(&xml)
84          .map_err(|e| warn!("Error in {}\n{}", link, e))
85          .ok()
86      }
87    })
88    .buffer_unordered(CONCURRENT_REQUESTS)
89    .collect::<Vec<Option<Blog>>>()
90    .await
91}
92
93/// Parses links from `feeds.txt`.
94///
95/// Assumed one link per line. Any text between a `#` and a line end
96/// is considered a comment.
97pub fn read_feeds() -> Vec<String> {
98  let links = std::env::var("FEEDS")
99    .or_else(|_| fs::read_to_string("feeds.txt"))
100    .expect("Error in reading the feeds");
101
102  // Not really necessary but yes
103  // https://docs.rs/regex/latest/regex/#example-avoid-compiling-the-same-regex-in-a-loop
104  lazy_static! {
105    static ref RE: Regex = #[allow(clippy::unwrap_used)]
106    Regex::new(r"#.*$").unwrap();
107  }
108
109  links
110    .split(feeds_splitter)
111    .map(ToString::to_string)
112    .map(|l| RE.replace_all(&l, "").to_string())
113    .map(|l| l.trim().to_owned())
114    .filter(|l| !l.is_empty())
115    .unique()
116    .collect::<Vec<String>>()
117}
118
119/// Splits the feeds on either
120///
121/// - `\n` for input coming from `feeds.txt`
122/// - `;`  for input coming from an environment variable
123const fn feeds_splitter(c: char) -> bool {
124  c == '\n' || c == ';'
125}
126
127/// A basic <h1> element with the current date.
128pub fn html_title() -> String {
129  format!("<h1>Rss2Email - {}</h1>", Utc::now().date_naive())
130}
131
132/// Generates the HTML contents corresponding to the given Blog collection.
133pub fn map_to_html(blogs: &Vec<Blog>) -> String {
134  let mut res = html_title();
135
136  for blog in blogs {
137    let mut tmp = format!("<h2>{}</h2><ul>", blog.title);
138    for post in &blog.posts {
139      let _ = write!(tmp, "<li><a href=\"{}\">{}</a></li>", post.link, post.title);
140
141      // Removed for now, see https://github.com/AntoniosBarotsis/Rss2Email/issues/38
142      // if let Some(desc) = &post.description {
143      //   tmp.push_str(&format!("<p>{}</p>", desc));
144      // }
145    }
146    tmp.push_str("</ul>");
147    res.push_str(&tmp);
148  }
149
150  res
151}
152
153/// Returns true if the passed date is within `n` days from the current date.
154fn within_n_days(n: i64, date: &DateTime<Utc>) -> bool {
155  let today = Utc::now();
156  let date = date.with_timezone(&Utc);
157  (today - date).num_days() <= n
158}
159
160fn is_supported_content(content_type: &str) -> bool {
161  let supported = [
162    "application/xml",
163    "text/xml",
164    "application/rss+xml",
165    "application/atom+xml",
166    "text/html",
167  ];
168  supported.contains(&content_type)
169}
170
171/// Helper function for downloading the contents of a web page.
172pub async fn get_page_async(url: &str, client: &Client) -> Result<String, Error> {
173  let response = client
174    .get(url)
175    .header(
176      "Accept",
177      "application/xml, text/xml, application/rss+xml, application/atom+xml",
178    )
179    .header("User-Agent", "Rss2Email");
180  let response = response.send().await?;
181
182  let content_type = response
183    .headers()
184    .get(reqwest::header::CONTENT_TYPE)
185    .ok_or_else(|| Error::Generic("No content type header found on request.".to_string()))?
186    .to_str()
187    .map_err(|_e| Error::Generic("Content Type parsing error".to_string()))?
188    .split(';')
189    .collect::<Vec<&str>>()[0]
190    .to_owned();
191
192  if !is_supported_content(&content_type) {
193    return Err(Error::Generic(format!(
194      "Invalid content {} for {}",
195      content_type.as_str(),
196      url
197    )));
198  }
199
200  if !response.status().is_success() {
201    return Err(Error::Generic(response.text().await?));
202  }
203
204  response
205    .text()
206    .await
207    .map(|text| text.trim().to_string())
208    .map_err(|_e| Error::Generic("Body decode error".to_string()))
209}
210
211/// Helper function that times and prints the elapsed execution time
212/// of `F` if ran in debug mode.
213///
214/// # Usage
215///
216/// ```
217/// use rss2email_lib::*;
218/// let blogs: Vec<Blog> = time_func(|| download_blogs(7), "download_blogs");
219/// ```
220pub fn time_func<F, O>(f: F, fname: &str) -> O
221where
222  F: Fn() -> O,
223  O: Clone,
224{
225  let start = SystemTime::now();
226
227  let res = f();
228
229  let since_the_epoch = SystemTime::now()
230    .duration_since(start)
231    .expect("Time went backwards");
232
233  if cfg!(debug_assertions) {
234    info!(
235      "Elapsed time for {} was {:?}ms",
236      fname,
237      since_the_epoch.as_millis()
238    );
239  }
240
241  res
242}
243
244impl Display for Error {
245  fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
246    match &self {
247      Self::Reqwest(e) => write!(f, "{e}"),
248      Self::HeaderString(e) => write!(f, "{e}"),
249      Self::Io(e) => write!(f, "{e}"),
250      Self::Generic(e) => write!(f, "{e}"),
251    }
252  }
253}
254
255impl From<std::io::Error> for Error {
256  fn from(error: std::io::Error) -> Self {
257    Self::Io(error)
258  }
259}
260
261impl From<reqwest::Error> for Error {
262  fn from(error: reqwest::Error) -> Self {
263    Self::Reqwest(Box::new(error))
264  }
265}
266
267impl From<http::header::ToStrError> for Error {
268  fn from(error: http::header::ToStrError) -> Self {
269    Self::HeaderString(Box::new(error))
270  }
271}