rss2email_lib/xml/
atom.rs1use chrono::{DateTime, Utc};
19use log::warn;
20use quick_xml::DeError;
21use serde_derive::{Deserialize, Serialize};
22
23use crate::blog::{Blog, Post};
24
25use super::{
26 limit_description,
27 traits::{BlogPost, WebFeed},
28 ParserError,
29};
30
31#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
32#[serde(rename_all = "camelCase")]
33#[serde(rename = "feed")]
34pub struct AtomFeed {
35 pub title: String,
36 #[serde(rename = "entry", default)]
37 pub entries: Vec<AtomPost>,
38}
39
40#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
41#[serde(rename_all = "camelCase")]
42#[serde(rename = "entry")]
43pub struct AtomPost {
44 pub title: String,
45 #[serde(rename = "link")]
46 pub links: Vec<Link>,
47 pub summary: Option<String>,
48 pub description: Option<String>,
49 pub published: Option<String>,
50 pub updated: String,
51}
52
53#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
54pub struct Link {
55 #[serde(rename = "@href")]
57 href: String,
58}
59
60impl WebFeed for Result<AtomFeed, DeError> {
61 fn into_blog(self) -> Result<Blog, ParserError> {
62 let feed = self?;
63 let title = feed.title;
64
65 let posts: Vec<Post> = feed
66 .entries
67 .iter()
68 .filter_map(|x| match x.clone().into_post() {
70 Ok(post) => Some(post),
71 Err(e) => {
72 warn!(
73 "\"{}\"'s post titled \"{}\" errored with '{}'",
74 title, x.title, e
75 );
76 None
77 }
78 })
79 .collect::<Vec<_>>();
80
81 if posts.is_empty() {
82 return Err(ParserError::Parse(format!("Empty feed: {title}")));
83 }
84
85 let last_build_date = posts
86 .iter()
87 .map(|x| x.pub_date)
88 .max()
89 .ok_or_else(|| ParserError::Parse("Date error.".to_owned()))?;
90
91 Ok(Blog {
92 title,
93 most_recent_pub_date: last_build_date,
94 posts,
95 })
96 }
97}
98
99impl BlogPost for AtomPost {
100 fn into_post(self) -> Result<Post, ParserError> {
101 let title = self.title;
102 let link = self.links[0].href.clone();
104 let description = self
105 .summary
106 .or(self.description)
107 .map(|desc| limit_description(&desc, 200));
108
109 let pub_date = self.published.unwrap_or(self.updated);
111
112 if pub_date.is_empty() {
113 return Err(ParserError::empty_date_error());
114 }
115
116 match DateTime::parse_from_rfc3339(&pub_date) {
117 Ok(last_build_date) => Ok(Post {
118 title,
119 link,
120 description,
121 pub_date: last_build_date.with_timezone(&Utc),
122 }),
123 Err(e) => Err(ParserError::generic_date_error(format!(
124 "Error parsing date '{pub_date}' ({e})"
125 ))),
126 }
127 }
128}