1use chrono::{DateTime, FixedOffset, Utc};
20use quick_xml::DeError;
21use regex::Regex;
22use serde_derive::{Deserialize, Serialize};
23
24use crate::{
25 blog::{Blog, Post},
26 warn,
27};
28
29use super::{
30 limit_description,
31 traits::{BlogPost, WebFeed},
32 ParserError,
33};
34
35#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
36#[serde(rename = "rss")]
37pub struct RssFeed {
38 pub channel: Channel,
39}
40
41#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
42#[serde(rename_all = "camelCase")]
43pub struct Channel {
44 pub title: String,
45 pub last_build_date: Option<String>,
46 pub pub_date: Option<String>,
47 #[serde(rename = "item", default)]
48 pub items: Vec<RssPost>,
49}
50
51#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
52#[serde(rename_all = "camelCase")]
53#[serde(rename = "item")]
54pub struct RssPost {
55 pub title: Option<String>,
58 pub link: Option<String>,
59 pub description: Option<String>,
60 pub pub_date: Option<String>,
61}
62
63impl WebFeed for Result<RssFeed, DeError> {
64 fn into_blog(self) -> Result<Blog, ParserError> {
65 let feed = self?;
66 let title = feed.channel.title;
67 let site_last_build_date = feed.channel.pub_date;
68 let items = feed.channel.items;
69 let last_post_build_date = items.first().and_then(|x| x.clone().pub_date);
70
71 let last_build_date = site_last_build_date
72 .or(last_post_build_date)
73 .ok_or_else(|| ParserError::Parse("Date not found.".to_owned()))?;
74
75 let posts: Vec<Post> = items
76 .iter()
77 .filter_map(|x| match x.clone().into_post() {
79 Ok(post) => Some(post),
80 Err(e) => {
81 warn!(
82 "\"{}\"'s post titled \"{}\" errored with '{}'",
83 title,
84 x.title
85 .as_ref()
86 .map_or_else(|| "n/a".to_string(), Clone::clone),
87 e
88 );
89 None
90 }
91 })
92 .collect();
93
94 let last_build_date = parse_date_helper(&last_build_date)?;
95
96 Ok(Blog {
97 title,
98 most_recent_pub_date: last_build_date.with_timezone(&Utc),
99 posts,
100 })
101 }
102}
103
104impl BlogPost for RssPost {
105 fn into_post(self) -> Result<Post, ParserError> {
106 let Some(link) = self.link else {
107 return Err(ParserError::Parse("No link in post".to_string()));
108 };
109
110 let (title, description) = match (
111 self.title,
112 self.description.map(|desc| limit_description(&desc, 200)),
113 ) {
114 (Some(link), description) => (link, description),
115 (None, None) => (link.clone(), None),
116 (None, Some(description)) => {
117 if description.len() > 50 {
118 (limit_description(&description, 50), Some(description))
119 } else {
120 (description, None)
121 }
122 }
123 };
124
125 let pub_date = self
126 .pub_date
127 .ok_or_else(|| ParserError::Parse("Date not found.".to_owned()))?;
128
129 let last_build_date = parse_date_helper(&pub_date)?;
130
131 Ok(Post {
132 title,
133 link,
134 description,
135 pub_date: last_build_date.with_timezone(&Utc),
136 })
137 }
138}
139
140fn parse_date_helper(date: &str) -> Result<DateTime<FixedOffset>, ParserError> {
143 if date.is_empty() {
144 return Err(ParserError::empty_date_error());
145 }
146
147 DateTime::parse_from_rfc2822(date).or_else(|_| parse_from_rfc822(date))
148}
149
150fn parse_from_rfc822(date: &str) -> Result<DateTime<FixedOffset>, ParserError> {
157 let format_str = "%d %b %y %H:%M";
158
159 let regex = Regex::new(r"\d\s?([a-zA-Z]{2,3}$)").expect("Invalid regex");
164
165 let cap = regex
166 .captures(date)
167 .and_then(|x| x.get(1))
168 .ok_or_else(|| ParserError::timezone_date_error("Timezone not found".to_owned()))?
169 .as_str();
170
171 let date = regex.replace_all(date, "").to_string();
172
173 let tz = tz_to_offset(cap)?;
174
175 DateTime::parse_from_str(&date, format_str)
176 .map(|dt| dt.with_timezone(&tz))
177 .map_err(|e| ParserError::generic_date_error(format!("Error parsing date '{date}' ({e})")))
178}
179
180fn tz_to_offset(tz: &str) -> Result<FixedOffset, ParserError> {
182 match tz {
183 "UTC" => Ok(FixedOffset::east_opt(0).expect("FixedOffset::east out of bounds")),
184 _ => Err(ParserError::timezone_date_error(format!(
185 "Unknown timezone {tz}, please open an issue!"
186 ))),
187 }
188}