rss2email_lib/xml/
atom.rs

1//! [Specification](https://www.rfc-editor.org/rfc/rfc4287)
2//!
3//! ```xml
4//! <feed>
5//!   <title></title>
6//!   <updated>ISO.8601</updated>
7//!   <entry>
8//!     <title></title>
9//!     <link href=""/>
10//!     <updated>ISO.8601</updated>
11//!     <published>ISO.8601</published>?
12//!     <summary></summary>?
13//!     <description></description>?
14//!   </entry>
15//! </feed>
16//! ```
17
18use chrono::{DateTime, Utc};
19use log::warn;
20use quick_xml::DeError;
21use serde_derive::{Deserialize, Serialize};
22
23use crate::blog::{Blog, Post};
24
25use super::{
26  limit_description,
27  traits::{BlogPost, WebFeed},
28  ParserError,
29};
30
31#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
32#[serde(rename_all = "camelCase")]
33#[serde(rename = "feed")]
34pub struct AtomFeed {
35  pub title: String,
36  #[serde(rename = "entry", default)]
37  pub entries: Vec<AtomPost>,
38}
39
40#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
41#[serde(rename_all = "camelCase")]
42#[serde(rename = "entry")]
43pub struct AtomPost {
44  pub title: String,
45  #[serde(rename = "link")]
46  pub links: Vec<Link>,
47  pub summary: Option<String>,
48  pub description: Option<String>,
49  pub published: Option<String>,
50  pub updated: String,
51}
52
53#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
54pub struct Link {
55  // See https://github.com/tafia/quick-xml/issues/534
56  #[serde(rename = "@href")]
57  href: String,
58}
59
60impl WebFeed for Result<AtomFeed, DeError> {
61  fn into_blog(self) -> Result<Blog, ParserError> {
62    let feed = self?;
63    let title = feed.title;
64
65    let posts: Vec<Post> = feed
66      .entries
67      .iter()
68      // TODO Turn this into a method
69      .filter_map(|x| match x.clone().into_post() {
70        Ok(post) => Some(post),
71        Err(e) => {
72          warn!(
73            "\"{}\"'s post titled \"{}\" errored with '{}'",
74            title, x.title, e
75          );
76          None
77        }
78      })
79      .collect::<Vec<_>>();
80
81    if posts.is_empty() {
82      return Err(ParserError::Parse(format!("Empty feed: {title}")));
83    }
84
85    let last_build_date = posts
86      .iter()
87      .map(|x| x.pub_date)
88      .max()
89      .ok_or_else(|| ParserError::Parse("Date error.".to_owned()))?;
90
91    Ok(Blog {
92      title,
93      most_recent_pub_date: last_build_date,
94      posts,
95    })
96  }
97}
98
99impl BlogPost for AtomPost {
100  fn into_post(self) -> Result<Post, ParserError> {
101    let title = self.title;
102    // Use the first link for now
103    let link = self.links[0].href.clone();
104    let description = self
105      .summary
106      .or(self.description)
107      .map(|desc| limit_description(&desc, 200));
108
109    // Use publish date if exists otherwise fallback to updated
110    let pub_date = self.published.unwrap_or(self.updated);
111
112    if pub_date.is_empty() {
113      return Err(ParserError::empty_date_error());
114    }
115
116    match DateTime::parse_from_rfc3339(&pub_date) {
117      Ok(last_build_date) => Ok(Post {
118        title,
119        link,
120        description,
121        pub_date: last_build_date.with_timezone(&Utc),
122      }),
123      Err(e) => Err(ParserError::generic_date_error(format!(
124        "Error parsing date '{pub_date}' ({e})"
125      ))),
126    }
127  }
128}