1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
//! [Specification](https://www.rfc-editor.org/rfc/rfc4287)
//!
//! ```xml
//! <feed>
//!   <title></title>
//!   <updated>ISO.8601</updated>
//!   <entry>
//!     <title></title>
//!     <link href=""/>
//!     <updated>ISO.8601</updated>
//!     <published>ISO.8601</published>?
//!     <summary></summary>?
//!     <description></description>?
//!   </entry>
//! </feed>
//! ```

use chrono::{DateTime, Utc};
use log::warn;
use quick_xml::DeError;
use serde_derive::{Deserialize, Serialize};

use crate::blog::{Blog, Post};

use super::{
  limit_description,
  traits::{BlogPost, WebFeed},
  ParserError,
};

#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
#[serde(rename_all = "camelCase")]
#[serde(rename = "feed")]
pub struct AtomFeed {
  pub title: String,
  #[serde(rename = "entry", default)]
  pub entries: Vec<AtomPost>,
}

#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
#[serde(rename_all = "camelCase")]
#[serde(rename = "entry")]
pub struct AtomPost {
  pub title: String,
  #[serde(rename = "link")]
  pub links: Vec<Link>,
  pub summary: Option<String>,
  pub description: Option<String>,
  pub published: Option<String>,
  pub updated: String,
}

#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
pub struct Link {
  // See https://github.com/tafia/quick-xml/issues/534
  #[serde(rename = "@href")]
  href: String,
}

impl WebFeed for Result<AtomFeed, DeError> {
  fn into_blog(self) -> Result<Blog, ParserError> {
    let feed = self?;
    let title = feed.title;

    let posts: Vec<Post> = feed
      .entries
      .iter()
      // TODO Turn this into a method
      .filter_map(|x| match x.clone().into_post() {
        Ok(post) => Some(post),
        Err(e) => {
          warn!(
            "\"{}\"'s post titled \"{}\" errored with '{}'",
            title, x.title, e
          );
          None
        }
      })
      .collect::<Vec<_>>();

    if posts.is_empty() {
      return Err(ParserError::Parse(format!("Empty feed: {title}")));
    }

    let last_build_date = posts
      .iter()
      .map(|x| x.pub_date)
      .max()
      .ok_or_else(|| ParserError::Parse("Date error.".to_owned()))?;

    Ok(Blog {
      title,
      most_recent_pub_date: last_build_date,
      posts,
    })
  }
}

impl BlogPost for AtomPost {
  fn into_post(self) -> Result<Post, ParserError> {
    let title = self.title;
    // Use the first link for now
    let link = self.links[0].href.clone();
    let description = self
      .summary
      .or(self.description)
      .map(|desc| limit_description(&desc, 200));

    // Use publish date if exists otherwise fallback to updated
    let pub_date = self.published.unwrap_or(self.updated);

    if pub_date.is_empty() {
      return Err(ParserError::empty_date_error());
    }

    match DateTime::parse_from_rfc3339(&pub_date) {
      Ok(last_build_date) => Ok(Post {
        title,
        link,
        description,
        pub_date: last_build_date.with_timezone(&Utc),
      }),
      Err(e) => Err(ParserError::generic_date_error(format!(
        "Error parsing date '{pub_date}' ({e})"
      ))),
    }
  }
}