feed_parser/parsers/atom/
mod.rs

1use crate::parsers::Feed;
2use core::str;
3use quick_xml::de::from_str;
4use quick_xml::events::{BytesEnd, BytesStart, BytesText, Event};
5use quick_xml::Reader;
6use quick_xml::Writer;
7use std::io::Cursor;
8
9#[cfg(test)]
10mod tests;
11
12pub fn parse(text: &str) -> Result<Vec<Feed>, String> {
13    let mut reader = Reader::from_str(text);
14    reader.config_mut().trim_text(true);
15
16    let mut feeds = Vec::new();
17    let mut writer = Writer::new(Cursor::new(Vec::new()));
18    let mut parsing = false;
19    loop {
20        match reader.read_event() {
21            Ok(Event::Start(e)) => {
22                if parsing {
23                    if e.name().as_ref() == b"dc:creator" {
24                        assert!(writer
25                            .write_event(Event::Start(BytesStart::new("creator")))
26                            .is_ok());
27                    } else if e.name().as_ref() == b"dc:date" {
28                        assert!(writer
29                            .write_event(Event::Start(BytesStart::new("date")))
30                            .is_ok());
31                    } else if e.name().as_ref() == b"pubDate" || e.name().as_ref() == b"published" {
32                        assert!(writer
33                            .write_event(Event::Start(BytesStart::new("publish_date")))
34                            .is_ok());
35                    } else if e.name().as_ref() == b"link" {
36                        continue;
37                    } else {
38                        assert!(writer.write_event(Event::Start(e.clone())).is_ok());
39                    }
40                }
41                if e.name().as_ref() == b"entry" {
42                    assert!(writer
43                        .write_event(Event::Start(BytesStart::new("entry")))
44                        .is_ok());
45                    parsing = true;
46                }
47            }
48            Ok(Event::Empty(e)) => {
49                if parsing {
50                    if e.name().as_ref() == b"link" {
51                        let mut is_link = true;
52                        for attr in e.attributes() {
53                            let attr = attr.unwrap();
54                            if attr.key.0 == b"type" {
55                                let attr_text: &str = str::from_utf8(attr.value.as_ref()).unwrap();
56                                if attr_text != "text/html" {
57                                    is_link = false;
58                                }
59                            } else if attr.key.0 == b"rel" {
60                                let attr_text: &str = str::from_utf8(attr.value.as_ref()).unwrap();
61                                if attr_text != "alternate" {
62                                    is_link = false;
63                                }
64                            }
65                        }
66                        if is_link == false {
67                            continue;
68                        }
69                        for attr in e.attributes() {
70                            let attr = attr.unwrap();
71                            if attr.key.0 == b"href" {
72                                assert!(writer
73                                    .write_event(Event::Start(BytesStart::new("link")))
74                                    .is_ok());
75                                let attr_text: &str = str::from_utf8(attr.value.as_ref()).unwrap();
76                                assert!(writer
77                                    .write_event(Event::Text(BytesText::new(attr_text)))
78                                    .is_ok());
79                                assert!(writer
80                                    .write_event(Event::End(BytesEnd::new("link")))
81                                    .is_ok());
82                            }
83                        }
84                    } else {
85                        assert!(writer.write_event(Event::Empty(e)).is_ok());
86                    }
87                }
88            }
89            Ok(Event::End(e)) => {
90                if e.name().as_ref() == b"entry" {
91                    assert!(writer
92                        .write_event(Event::End(BytesEnd::new("entry")))
93                        .is_ok());
94                    let feed_text = writer.into_inner().into_inner();
95                    let feed = from_str::<Feed>(str::from_utf8(&feed_text).unwrap()).unwrap();
96                    feeds.push(feed);
97
98                    writer = Writer::new(Cursor::new(Vec::new()));
99                    parsing = false;
100                }
101                if parsing {
102                    if e.name().as_ref() == b"dc:creator" {
103                        assert!(writer
104                            .write_event(Event::End(BytesEnd::new("creator")))
105                            .is_ok());
106                    } else if e.name().as_ref() == b"dc:date" {
107                        assert!(writer
108                            .write_event(Event::End(BytesEnd::new("date")))
109                            .is_ok());
110                    } else if e.name().as_ref() == b"pubDate" || e.name().as_ref() == b"published" {
111                        assert!(writer
112                            .write_event(Event::End(BytesEnd::new("publish_date")))
113                            .is_ok());
114                    } else if e.name().as_ref() == b"link" {
115                        continue;
116                    } else {
117                        assert!(writer.write_event(Event::End(e)).is_ok());
118                    }
119                }
120            }
121            Ok(Event::Text(e)) => {
122                if parsing {
123                    assert!(writer.write_event(Event::Text(e)).is_ok());
124                }
125            }
126            Ok(Event::CData(e)) => {
127                if parsing {
128                    assert!(writer.write_event(Event::CData(e)).is_ok());
129                }
130            }
131            Ok(Event::Eof) => break,
132            Ok(_e) => {}
133            Err(e) => {
134                return Err(format!(
135                    "Error at position {}: {:?}",
136                    reader.error_position(),
137                    e
138                ))
139            }
140        }
141    }
142    return Ok(feeds);
143}