readfeed/
lib.rs

1//! `ReadFeed` is a library to process feeds. It provides pull parsers for common feed
2//! formats such as [RSS][rss] and [Atom][atom].
3//!
4//! ## Examples
5//!
6//! ### RSS
7//!
8//! ```rust
9//! use readfeed::rss::{self, ChannelElem, Elem, ItemElem, RssElem};
10//!
11//! let input = "
12//! <rss>
13//!     <channel>
14//!         <title>Channel Title</title>
15//!         <item>
16//!             <title>Item Title 1</title>
17//!             <link>https://example.com/1</link>
18//!             <description>Item Description 1</description>
19//!         </item>
20//!     </channel>
21//! </rss>
22//! ";
23//!
24//! let mut iter = rss::Iter::new(input);
25//!
26//! let Some(Elem::Rss(mut rss_iter)) = iter.next() else {
27//!     panic!();
28//! };
29//!
30//! let Some(RssElem::Channel(mut channel_iter)) = rss_iter.next() else {
31//!     panic!();
32//! };
33//!
34//! if let Some(ChannelElem::Title(title)) = channel_iter.next() {
35//!     assert_eq!("Channel Title", title.content());
36//! } else {
37//!     panic!();
38//! }
39//!
40//! let Some(ChannelElem::Item(mut item_iter)) = channel_iter.next() else {
41//!     panic!();
42//! };
43//!
44//! if let Some(ItemElem::Title(title)) = item_iter.next() {
45//!     assert_eq!("Item Title 1", title.content());
46//! } else {
47//!     panic!();
48//! }
49//! if let Some(ItemElem::Link(link)) = item_iter.next() {
50//!     assert_eq!("https://example.com/1", link.content());
51//! } else {
52//!     panic!();
53//! }
54//! if let Some(ItemElem::Description(desc)) = item_iter.next() {
55//!     assert_eq!("Item Description 1", desc.content());
56//! } else {
57//!     panic!();
58//! }
59//! assert_eq!(None, item_iter.next());
60//!
61//! assert_eq!(None, channel_iter.next());
62//! assert_eq!(None, rss_iter.next());
63//! assert_eq!(None, iter.next());
64//! ```
65//!
66//! ### Atom
67//!
68//! ```rust
69//! use readfeed::atom::{self, Elem, EntryElem, FeedElem};
70//!
71//! let input = r#"
72//! <feed xmlns="http://www.w3.org/2005/Atom">
73//!     <title>Lorem ipsum dolor sit amet.</title>
74//!     <link href="https://example.com/"/>
75//!     <updated>2021-02-24T09:08:10Z</updated>
76//!     <id>urn:uuid:ba9192e8-9e34-4c23-8445-94b67ba316ee</id>
77//!     <entry>
78//!         <title>Lorem ipsum dolor sit.</title>
79//!         <link href="http://example.com/2021/02/24/hello"/>
80//!         <id>urn:uuid:425ba23c-d283-4580-8a3c-3b67aaa6b373</id>
81//!         <updated>2021-02-24T09:08:10Z</updated>
82//!         <summary>Lorem ipsum dolor sit amet, consectetur adipiscing.</summary>
83//!     </entry>
84//! </feed>
85//! "#;
86//!
87//! let mut iter = atom::Iter::new(input);
88//!
89//! let Some(Elem::Feed(mut feed_iter)) = iter.next() else {
90//!     panic!();
91//! };
92//!
93//! if let Some(FeedElem::Title(title)) = feed_iter.next() {
94//!     assert_eq!("Lorem ipsum dolor sit amet.", title.content());
95//! } else {
96//!     panic!();
97//! }
98//!
99//! if let Some(FeedElem::Link(link)) = feed_iter.next() {
100//!     assert_eq!(Some("https://example.com/"), link.href().map(|v| v.as_str()));
101//! } else {
102//!     panic!();
103//! }
104//!
105//! if let Some(FeedElem::Updated(updated)) = feed_iter.next() {
106//!     assert_eq!("2021-02-24T09:08:10Z", updated.content());
107//! } else {
108//!     panic!();
109//! }
110//!
111//! if let Some(FeedElem::Id(id)) = feed_iter.next() {
112//!     assert_eq!("urn:uuid:ba9192e8-9e34-4c23-8445-94b67ba316ee", id.content());
113//! } else {
114//!     panic!();
115//! }
116//!
117//! if let Some(FeedElem::Entry(mut entry_iter)) = feed_iter.next() {
118//!     if let Some(EntryElem::Title(title)) = entry_iter.next() {
119//!         assert_eq!("Lorem ipsum dolor sit.", title.content());
120//!     } else {
121//!         panic!();
122//!     }
123//!     if let Some(EntryElem::Link(link)) = entry_iter.next() {
124//!         assert_eq!(Some("http://example.com/2021/02/24/hello"), link.href().map(|v| v.as_str()));
125//!     } else {
126//!         panic!();
127//!     }
128//!     if let Some(EntryElem::Id(id)) = entry_iter.next() {
129//!         assert_eq!("urn:uuid:425ba23c-d283-4580-8a3c-3b67aaa6b373", id.content());
130//!     } else {
131//!         panic!();
132//!     }
133//!     if let Some(EntryElem::Updated(updated)) = entry_iter.next() {
134//!         assert_eq!("2021-02-24T09:08:10Z", updated.content());
135//!     } else {
136//!         panic!();
137//!     }
138//!     if let Some(EntryElem::Summary(summary)) = entry_iter.next() {
139//!         assert_eq!("Lorem ipsum dolor sit amet, consectetur adipiscing.", summary.content());
140//!     } else {
141//!         panic!();
142//!     }
143//!     assert_eq!(None, entry_iter.next());
144//! } else {
145//!     panic!();
146//! }
147//!
148//! assert_eq!(None, feed_iter.next());
149//! assert_eq!(None, iter.next());
150//! ```
151//!
152//! [rss]: https://www.rssboard.org/rss-specification
153//! [atom]: https://datatracker.ietf.org/doc/html/rfc4287
154
155#![cfg_attr(not(feature = "std"), no_std)]
156#![cfg_attr(docsrs, feature(doc_cfg))]
157#![warn(
158    missing_copy_implementations,
159    missing_debug_implementations,
160    rust_2018_idioms,
161    unused_lifetimes,
162    unused_qualifications
163)]
164
165use maybe_xml::token::{
166    prop::{AttributeValue, Attributes, TagName},
167    EmptyElementTag, StartTag,
168};
169
170/// Type of document
171#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
172pub enum Ty {
173    Atom,
174    Json,
175    Rss,
176    Unknown,
177    XmlOrHtml,
178}
179
180/// Attempt to detect the type of document.
181#[must_use]
182pub fn detect_type(input: &str) -> Ty {
183    input
184        .chars()
185        .filter(|c| !c.is_whitespace())
186        .map(|c| match c {
187            '{' | '[' => Ty::Json,
188            '<' => xml::find_ty(input),
189            _ => Ty::Unknown,
190        })
191        .next()
192        .unwrap_or(Ty::Unknown)
193}
194
195pub mod atom;
196pub mod html;
197pub mod opml;
198pub mod rss;
199pub mod xml;
200
201#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
202enum Tag<'a> {
203    Start(StartTag<'a>),
204    EmptyElement(EmptyElementTag<'a>),
205}
206
207impl<'a> Tag<'a> {
208    #[inline]
209    #[must_use]
210    const fn tag_name(&self) -> TagName<'a> {
211        match self {
212            Tag::Start(tag) => tag.name(),
213            Tag::EmptyElement(tag) => tag.name(),
214        }
215    }
216
217    #[inline]
218    #[must_use]
219    const fn attributes(&self) -> Option<Attributes<'a>> {
220        match self {
221            Tag::Start(tag) => tag.attributes(),
222            Tag::EmptyElement(tag) => tag.attributes(),
223        }
224    }
225
226    #[must_use]
227    fn find_attribute(&self, needle: &str) -> Option<AttributeValue<'a>> {
228        let mut pos = 0;
229        if let Some(attrs) = self.attributes() {
230            loop {
231                if let Some(attribute) = attrs.parse(pos) {
232                    let name = attribute.name().as_str();
233                    if name.eq_ignore_ascii_case(needle) {
234                        return attribute.value();
235                    }
236                    pos += attribute.len();
237                } else {
238                    return None;
239                }
240            }
241        } else {
242            None
243        }
244    }
245}
246
247#[cfg(test)]
248mod tests {
249    use super::*;
250
251    #[test]
252    fn detect_type_atom() {
253        let input = include_str!("../tests/resources/atom-1.xml");
254        assert_eq!(Ty::Atom, detect_type(input));
255    }
256
257    #[test]
258    fn detect_type_rss() {
259        let input = include_str!("../tests/resources/rss-1.xml");
260        assert_eq!(Ty::Rss, detect_type(input));
261    }
262
263    #[test]
264    fn detect_type_html() {
265        let input = include_str!("../tests/resources/html-1.html");
266        assert_eq!(Ty::XmlOrHtml, detect_type(input));
267    }
268
269    #[test]
270    fn detect_type_empty() {
271        let input = "";
272        assert_eq!(Ty::Unknown, detect_type(input));
273    }
274
275    #[test]
276    fn detect_type_json() {
277        let input = "{}";
278        assert_eq!(Ty::Json, detect_type(input));
279    }
280}