feed_parser/parsers/atom/
mod.rs1use crate::parsers::Feed;
2use core::str;
3use quick_xml::de::from_str;
4use quick_xml::events::{BytesEnd, BytesStart, BytesText, Event};
5use quick_xml::Reader;
6use quick_xml::Writer;
7use std::io::Cursor;
8
9#[cfg(test)]
10mod tests;
11
12pub fn parse(text: &str) -> Result<Vec<Feed>, String> {
13 let mut reader = Reader::from_str(text);
14 reader.config_mut().trim_text(true);
15
16 let mut feeds = Vec::new();
17 let mut writer = Writer::new(Cursor::new(Vec::new()));
18 let mut parsing = false;
19 loop {
20 match reader.read_event() {
21 Ok(Event::Start(e)) => {
22 if parsing {
23 if e.name().as_ref() == b"dc:creator" {
24 assert!(writer
25 .write_event(Event::Start(BytesStart::new("creator")))
26 .is_ok());
27 } else if e.name().as_ref() == b"dc:date" {
28 assert!(writer
29 .write_event(Event::Start(BytesStart::new("date")))
30 .is_ok());
31 } else if e.name().as_ref() == b"pubDate" || e.name().as_ref() == b"published" {
32 assert!(writer
33 .write_event(Event::Start(BytesStart::new("publish_date")))
34 .is_ok());
35 } else if e.name().as_ref() == b"link" {
36 continue;
37 } else {
38 assert!(writer.write_event(Event::Start(e.clone())).is_ok());
39 }
40 }
41 if e.name().as_ref() == b"entry" {
42 assert!(writer
43 .write_event(Event::Start(BytesStart::new("entry")))
44 .is_ok());
45 parsing = true;
46 }
47 }
48 Ok(Event::Empty(e)) => {
49 if parsing {
50 if e.name().as_ref() == b"link" {
51 let mut is_link = true;
52 for attr in e.attributes() {
53 let attr = attr.unwrap();
54 if attr.key.0 == b"type" {
55 let attr_text: &str = str::from_utf8(attr.value.as_ref()).unwrap();
56 if attr_text != "text/html" {
57 is_link = false;
58 }
59 } else if attr.key.0 == b"rel" {
60 let attr_text: &str = str::from_utf8(attr.value.as_ref()).unwrap();
61 if attr_text != "alternate" {
62 is_link = false;
63 }
64 }
65 }
66 if is_link == false {
67 continue;
68 }
69 for attr in e.attributes() {
70 let attr = attr.unwrap();
71 if attr.key.0 == b"href" {
72 assert!(writer
73 .write_event(Event::Start(BytesStart::new("link")))
74 .is_ok());
75 let attr_text: &str = str::from_utf8(attr.value.as_ref()).unwrap();
76 assert!(writer
77 .write_event(Event::Text(BytesText::new(attr_text)))
78 .is_ok());
79 assert!(writer
80 .write_event(Event::End(BytesEnd::new("link")))
81 .is_ok());
82 }
83 }
84 } else {
85 assert!(writer.write_event(Event::Empty(e)).is_ok());
86 }
87 }
88 }
89 Ok(Event::End(e)) => {
90 if e.name().as_ref() == b"entry" {
91 assert!(writer
92 .write_event(Event::End(BytesEnd::new("entry")))
93 .is_ok());
94 let feed_text = writer.into_inner().into_inner();
95 let feed = from_str::<Feed>(str::from_utf8(&feed_text).unwrap()).unwrap();
96 feeds.push(feed);
97
98 writer = Writer::new(Cursor::new(Vec::new()));
99 parsing = false;
100 }
101 if parsing {
102 if e.name().as_ref() == b"dc:creator" {
103 assert!(writer
104 .write_event(Event::End(BytesEnd::new("creator")))
105 .is_ok());
106 } else if e.name().as_ref() == b"dc:date" {
107 assert!(writer
108 .write_event(Event::End(BytesEnd::new("date")))
109 .is_ok());
110 } else if e.name().as_ref() == b"pubDate" || e.name().as_ref() == b"published" {
111 assert!(writer
112 .write_event(Event::End(BytesEnd::new("publish_date")))
113 .is_ok());
114 } else if e.name().as_ref() == b"link" {
115 continue;
116 } else {
117 assert!(writer.write_event(Event::End(e)).is_ok());
118 }
119 }
120 }
121 Ok(Event::Text(e)) => {
122 if parsing {
123 assert!(writer.write_event(Event::Text(e)).is_ok());
124 }
125 }
126 Ok(Event::CData(e)) => {
127 if parsing {
128 assert!(writer.write_event(Event::CData(e)).is_ok());
129 }
130 }
131 Ok(Event::Eof) => break,
132 Ok(_e) => {}
133 Err(e) => {
134 return Err(format!(
135 "Error at position {}: {:?}",
136 reader.error_position(),
137 e
138 ))
139 }
140 }
141 }
142 return Ok(feeds);
143}