readfeed/lib.rs
1//! `ReadFeed` is a library to process feeds. It provides pull parsers for common feed
2//! formats such as [RSS][rss] and [Atom][atom].
3//!
4//! ## Examples
5//!
6//! ### RSS
7//!
8//! ```rust
9//! use readfeed::rss::{self, ChannelElem, Elem, ItemElem, RssElem};
10//!
11//! let input = "
12//! <rss>
13//! <channel>
14//! <title>Channel Title</title>
15//! <item>
16//! <title>Item Title 1</title>
17//! <link>https://example.com/1</link>
18//! <description>Item Description 1</description>
19//! </item>
20//! </channel>
21//! </rss>
22//! ";
23//!
24//! let mut iter = rss::Iter::new(input);
25//!
26//! let Some(Elem::Rss(mut rss_iter)) = iter.next() else {
27//! panic!();
28//! };
29//!
30//! let Some(RssElem::Channel(mut channel_iter)) = rss_iter.next() else {
31//! panic!();
32//! };
33//!
34//! if let Some(ChannelElem::Title(title)) = channel_iter.next() {
35//! assert_eq!("Channel Title", title.content());
36//! } else {
37//! panic!();
38//! }
39//!
40//! let Some(ChannelElem::Item(mut item_iter)) = channel_iter.next() else {
41//! panic!();
42//! };
43//!
44//! if let Some(ItemElem::Title(title)) = item_iter.next() {
45//! assert_eq!("Item Title 1", title.content());
46//! } else {
47//! panic!();
48//! }
49//! if let Some(ItemElem::Link(link)) = item_iter.next() {
50//! assert_eq!("https://example.com/1", link.content());
51//! } else {
52//! panic!();
53//! }
54//! if let Some(ItemElem::Description(desc)) = item_iter.next() {
55//! assert_eq!("Item Description 1", desc.content());
56//! } else {
57//! panic!();
58//! }
59//! assert_eq!(None, item_iter.next());
60//!
61//! assert_eq!(None, channel_iter.next());
62//! assert_eq!(None, rss_iter.next());
63//! assert_eq!(None, iter.next());
64//! ```
65//!
66//! ### Atom
67//!
68//! ```rust
69//! use readfeed::atom::{self, Elem, EntryElem, FeedElem};
70//!
71//! let input = r#"
72//! <feed xmlns="http://www.w3.org/2005/Atom">
73//! <title>Lorem ipsum dolor sit amet.</title>
74//! <link href="https://example.com/"/>
75//! <updated>2021-02-24T09:08:10Z</updated>
76//! <id>urn:uuid:ba9192e8-9e34-4c23-8445-94b67ba316ee</id>
77//! <entry>
78//! <title>Lorem ipsum dolor sit.</title>
79//! <link href="http://example.com/2021/02/24/hello"/>
80//! <id>urn:uuid:425ba23c-d283-4580-8a3c-3b67aaa6b373</id>
81//! <updated>2021-02-24T09:08:10Z</updated>
82//! <summary>Lorem ipsum dolor sit amet, consectetur adipiscing.</summary>
83//! </entry>
84//! </feed>
85//! "#;
86//!
87//! let mut iter = atom::Iter::new(input);
88//!
89//! let Some(Elem::Feed(mut feed_iter)) = iter.next() else {
90//! panic!();
91//! };
92//!
93//! if let Some(FeedElem::Title(title)) = feed_iter.next() {
94//! assert_eq!("Lorem ipsum dolor sit amet.", title.content());
95//! } else {
96//! panic!();
97//! }
98//!
99//! if let Some(FeedElem::Link(link)) = feed_iter.next() {
100//! assert_eq!(Some("https://example.com/"), link.href().map(|v| v.as_str()));
101//! } else {
102//! panic!();
103//! }
104//!
105//! if let Some(FeedElem::Updated(updated)) = feed_iter.next() {
106//! assert_eq!("2021-02-24T09:08:10Z", updated.content());
107//! } else {
108//! panic!();
109//! }
110//!
111//! if let Some(FeedElem::Id(id)) = feed_iter.next() {
112//! assert_eq!("urn:uuid:ba9192e8-9e34-4c23-8445-94b67ba316ee", id.content());
113//! } else {
114//! panic!();
115//! }
116//!
117//! if let Some(FeedElem::Entry(mut entry_iter)) = feed_iter.next() {
118//! if let Some(EntryElem::Title(title)) = entry_iter.next() {
119//! assert_eq!("Lorem ipsum dolor sit.", title.content());
120//! } else {
121//! panic!();
122//! }
123//! if let Some(EntryElem::Link(link)) = entry_iter.next() {
124//! assert_eq!(Some("http://example.com/2021/02/24/hello"), link.href().map(|v| v.as_str()));
125//! } else {
126//! panic!();
127//! }
128//! if let Some(EntryElem::Id(id)) = entry_iter.next() {
129//! assert_eq!("urn:uuid:425ba23c-d283-4580-8a3c-3b67aaa6b373", id.content());
130//! } else {
131//! panic!();
132//! }
133//! if let Some(EntryElem::Updated(updated)) = entry_iter.next() {
134//! assert_eq!("2021-02-24T09:08:10Z", updated.content());
135//! } else {
136//! panic!();
137//! }
138//! if let Some(EntryElem::Summary(summary)) = entry_iter.next() {
139//! assert_eq!("Lorem ipsum dolor sit amet, consectetur adipiscing.", summary.content());
140//! } else {
141//! panic!();
142//! }
143//! assert_eq!(None, entry_iter.next());
144//! } else {
145//! panic!();
146//! }
147//!
148//! assert_eq!(None, feed_iter.next());
149//! assert_eq!(None, iter.next());
150//! ```
151//!
152//! [rss]: https://www.rssboard.org/rss-specification
153//! [atom]: https://datatracker.ietf.org/doc/html/rfc4287
154
155#![cfg_attr(not(feature = "std"), no_std)]
156#![cfg_attr(docsrs, feature(doc_cfg))]
157#![warn(
158 missing_copy_implementations,
159 missing_debug_implementations,
160 rust_2018_idioms,
161 unused_lifetimes,
162 unused_qualifications
163)]
164
165use maybe_xml::token::{
166 prop::{AttributeValue, Attributes, TagName},
167 EmptyElementTag, StartTag,
168};
169
170/// Type of document
171#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
172pub enum Ty {
173 Atom,
174 Json,
175 Rss,
176 Unknown,
177 XmlOrHtml,
178}
179
180/// Attempt to detect the type of document.
181#[must_use]
182pub fn detect_type(input: &str) -> Ty {
183 input
184 .chars()
185 .filter(|c| !c.is_whitespace())
186 .map(|c| match c {
187 '{' | '[' => Ty::Json,
188 '<' => xml::find_ty(input),
189 _ => Ty::Unknown,
190 })
191 .next()
192 .unwrap_or(Ty::Unknown)
193}
194
195pub mod atom;
196pub mod html;
197pub mod opml;
198pub mod rss;
199pub mod xml;
200
201#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
202enum Tag<'a> {
203 Start(StartTag<'a>),
204 EmptyElement(EmptyElementTag<'a>),
205}
206
207impl<'a> Tag<'a> {
208 #[inline]
209 #[must_use]
210 const fn tag_name(&self) -> TagName<'a> {
211 match self {
212 Tag::Start(tag) => tag.name(),
213 Tag::EmptyElement(tag) => tag.name(),
214 }
215 }
216
217 #[inline]
218 #[must_use]
219 const fn attributes(&self) -> Option<Attributes<'a>> {
220 match self {
221 Tag::Start(tag) => tag.attributes(),
222 Tag::EmptyElement(tag) => tag.attributes(),
223 }
224 }
225
226 #[must_use]
227 fn find_attribute(&self, needle: &str) -> Option<AttributeValue<'a>> {
228 let mut pos = 0;
229 if let Some(attrs) = self.attributes() {
230 loop {
231 if let Some(attribute) = attrs.parse(pos) {
232 let name = attribute.name().as_str();
233 if name.eq_ignore_ascii_case(needle) {
234 return attribute.value();
235 }
236 pos += attribute.len();
237 } else {
238 return None;
239 }
240 }
241 } else {
242 None
243 }
244 }
245}
246
247#[cfg(test)]
248mod tests {
249 use super::*;
250
251 #[test]
252 fn detect_type_atom() {
253 let input = include_str!("../tests/resources/atom-1.xml");
254 assert_eq!(Ty::Atom, detect_type(input));
255 }
256
257 #[test]
258 fn detect_type_rss() {
259 let input = include_str!("../tests/resources/rss-1.xml");
260 assert_eq!(Ty::Rss, detect_type(input));
261 }
262
263 #[test]
264 fn detect_type_html() {
265 let input = include_str!("../tests/resources/html-1.html");
266 assert_eq!(Ty::XmlOrHtml, detect_type(input));
267 }
268
269 #[test]
270 fn detect_type_empty() {
271 let input = "";
272 assert_eq!(Ty::Unknown, detect_type(input));
273 }
274
275 #[test]
276 fn detect_type_json() {
277 let input = "{}";
278 assert_eq!(Ty::Json, detect_type(input));
279 }
280}