1#![allow(clippy::missing_panics_doc)]
2
3use std::{fmt::Display, fs, time::SystemTime};
4
5use chrono::{DateTime, Utc};
6pub use error::Error;
7use futures::{stream, StreamExt};
8use itertools::Itertools;
9use lazy_static::lazy_static;
10use regex::Regex;
11use reqwest::Client;
12use std::fmt::Write as _;
13use tokio::runtime::Handle;
14
15pub use blog::{Blog, Post};
16mod blog;
17pub mod email;
18mod error;
19pub mod logger;
20pub mod xml;
21
22use crate::xml::parse_web_feed;
23
24const CONCURRENT_REQUESTS: usize = 10;
25
26pub fn download_blogs(days: i64) -> Vec<Blog> {
28 let links = read_feeds();
29
30 let contents = if let Ok(handle) = Handle::try_current() {
31 std::thread::spawn(move || handle.block_on(get_blogs(links)))
32 .join()
33 .expect("Error spawning blog download")
34 } else {
35 let rt = tokio::runtime::Builder::new_current_thread()
36 .enable_all()
37 .build()
38 .expect("Could not build tokio runtime");
39
40 rt.block_on(get_blogs(links))
41 };
42
43 let contents: Vec<Blog> = contents
44 .into_iter()
45 .filter_map(|x| match x {
46 Some(x) => {
47 if !within_n_days(days, &x.most_recent_pub_date) {
48 return None;
49 }
50
51 let recent_posts: Vec<Post> = x
52 .posts
53 .into_iter()
54 .filter(|x| within_n_days(days, &x.pub_date))
55 .collect();
56
57 let non_empty = !recent_posts.is_empty();
58
59 non_empty.then_some(Blog {
60 posts: recent_posts,
61 ..x
62 })
63 }
64 None => None,
65 })
66 .collect();
67
68 contents
69}
70
71async fn get_blogs(links: Vec<String>) -> Vec<Option<Blog>> {
73 let client = Client::new();
74 stream::iter(links)
75 .map(|link| {
76 let client = &client;
77 async move {
78 let xml = get_page_async(link.as_str(), client)
79 .await
80 .map_err(|e| warn!("Error in {}\n{}", link, e))
81 .ok()?;
82
83 parse_web_feed(&xml)
84 .map_err(|e| warn!("Error in {}\n{}", link, e))
85 .ok()
86 }
87 })
88 .buffer_unordered(CONCURRENT_REQUESTS)
89 .collect::<Vec<Option<Blog>>>()
90 .await
91}
92
93pub fn read_feeds() -> Vec<String> {
98 let links = std::env::var("FEEDS")
99 .or_else(|_| fs::read_to_string("feeds.txt"))
100 .expect("Error in reading the feeds");
101
102 lazy_static! {
105 static ref RE: Regex = #[allow(clippy::unwrap_used)]
106 Regex::new(r"#.*$").unwrap();
107 }
108
109 links
110 .split(feeds_splitter)
111 .map(ToString::to_string)
112 .map(|l| RE.replace_all(&l, "").to_string())
113 .map(|l| l.trim().to_owned())
114 .filter(|l| !l.is_empty())
115 .unique()
116 .collect::<Vec<String>>()
117}
118
119const fn feeds_splitter(c: char) -> bool {
124 c == '\n' || c == ';'
125}
126
127pub fn html_title() -> String {
129 format!("<h1>Rss2Email - {}</h1>", Utc::now().date_naive())
130}
131
132pub fn map_to_html(blogs: &Vec<Blog>) -> String {
134 let mut res = html_title();
135
136 for blog in blogs {
137 let mut tmp = format!("<h2>{}</h2><ul>", blog.title);
138 for post in &blog.posts {
139 let _ = write!(tmp, "<li><a href=\"{}\">{}</a></li>", post.link, post.title);
140
141 }
146 tmp.push_str("</ul>");
147 res.push_str(&tmp);
148 }
149
150 res
151}
152
153fn within_n_days(n: i64, date: &DateTime<Utc>) -> bool {
155 let today = Utc::now();
156 let date = date.with_timezone(&Utc);
157 (today - date).num_days() <= n
158}
159
160fn is_supported_content(content_type: &str) -> bool {
161 let supported = [
162 "application/xml",
163 "text/xml",
164 "application/rss+xml",
165 "application/atom+xml",
166 "text/html",
167 ];
168 supported.contains(&content_type)
169}
170
171pub async fn get_page_async(url: &str, client: &Client) -> Result<String, Error> {
173 let response = client
174 .get(url)
175 .header(
176 "Accept",
177 "application/xml, text/xml, application/rss+xml, application/atom+xml",
178 )
179 .header("User-Agent", "Rss2Email");
180 let response = response.send().await?;
181
182 let content_type = response
183 .headers()
184 .get(reqwest::header::CONTENT_TYPE)
185 .ok_or_else(|| Error::Generic("No content type header found on request.".to_string()))?
186 .to_str()
187 .map_err(|_e| Error::Generic("Content Type parsing error".to_string()))?
188 .split(';')
189 .collect::<Vec<&str>>()[0]
190 .to_owned();
191
192 if !is_supported_content(&content_type) {
193 return Err(Error::Generic(format!(
194 "Invalid content {} for {}",
195 content_type.as_str(),
196 url
197 )));
198 }
199
200 if !response.status().is_success() {
201 return Err(Error::Generic(response.text().await?));
202 }
203
204 response
205 .text()
206 .await
207 .map(|text| text.trim().to_string())
208 .map_err(|_e| Error::Generic("Body decode error".to_string()))
209}
210
211pub fn time_func<F, O>(f: F, fname: &str) -> O
221where
222 F: Fn() -> O,
223 O: Clone,
224{
225 let start = SystemTime::now();
226
227 let res = f();
228
229 let since_the_epoch = SystemTime::now()
230 .duration_since(start)
231 .expect("Time went backwards");
232
233 if cfg!(debug_assertions) {
234 info!(
235 "Elapsed time for {} was {:?}ms",
236 fname,
237 since_the_epoch.as_millis()
238 );
239 }
240
241 res
242}
243
244impl Display for Error {
245 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
246 match &self {
247 Self::Reqwest(e) => write!(f, "{e}"),
248 Self::HeaderString(e) => write!(f, "{e}"),
249 Self::Io(e) => write!(f, "{e}"),
250 Self::Generic(e) => write!(f, "{e}"),
251 }
252 }
253}
254
255impl From<std::io::Error> for Error {
256 fn from(error: std::io::Error) -> Self {
257 Self::Io(error)
258 }
259}
260
261impl From<reqwest::Error> for Error {
262 fn from(error: reqwest::Error) -> Self {
263 Self::Reqwest(Box::new(error))
264 }
265}
266
267impl From<http::header::ToStrError> for Error {
268 fn from(error: http::header::ToStrError) -> Self {
269 Self::HeaderString(Box::new(error))
270 }
271}