1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
use chrono::{DateTime, Utc};
use log::warn;
use quick_xml::DeError;
use serde_derive::{Deserialize, Serialize};
use crate::blog::{Blog, Post};
use super::{
limit_description,
traits::{BlogPost, WebFeed},
ParserError,
};
#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
#[serde(rename_all = "camelCase")]
#[serde(rename = "feed")]
pub struct AtomFeed {
pub title: String,
#[serde(rename = "entry", default)]
pub entries: Vec<AtomPost>,
}
#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
#[serde(rename_all = "camelCase")]
#[serde(rename = "entry")]
pub struct AtomPost {
pub title: String,
#[serde(rename = "link")]
pub links: Vec<Link>,
pub summary: Option<String>,
pub description: Option<String>,
pub published: Option<String>,
pub updated: String,
}
#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
pub struct Link {
#[serde(rename = "@href")]
href: String,
}
impl WebFeed for Result<AtomFeed, DeError> {
fn into_blog(self) -> Result<Blog, ParserError> {
let feed = self?;
let title = feed.title;
let posts: Vec<Post> = feed
.entries
.iter()
.filter_map(|x| match x.clone().into_post() {
Ok(post) => Some(post),
Err(e) => {
warn!(
"\"{}\"'s post titled \"{}\" errored with '{}'",
title, x.title, e
);
None
}
})
.collect::<Vec<_>>();
if posts.is_empty() {
return Err(ParserError::Parse(format!("Empty feed: {title}")));
}
let last_build_date = posts
.iter()
.map(|x| x.pub_date)
.max()
.ok_or_else(|| ParserError::Parse("Date error.".to_owned()))?;
Ok(Blog {
title,
most_recent_pub_date: last_build_date,
posts,
})
}
}
impl BlogPost for AtomPost {
fn into_post(self) -> Result<Post, ParserError> {
let title = self.title;
let link = self.links[0].href.clone();
let description = self.summary.or(self.description).map(|desc| limit_description(&desc, 200));
let pub_date = self.published.unwrap_or(self.updated);
if pub_date.is_empty() {
return Err(ParserError::empty_date_error());
}
match DateTime::parse_from_rfc3339(&pub_date) {
Ok(last_build_date) => Ok(Post {
title,
link,
description,
pub_date: last_build_date.with_timezone(&Utc),
}),
Err(e) => Err(ParserError::generic_date_error(format!(
"Error parsing date '{}' ({})",
pub_date, e
))),
}
}
}