readfeed/
atom.rs

1//! [Atom Syndication Format][atom] is an XML based syndication format.
2//!
3//! Use [`Iter`] as the starting type for parsing a feed.
4//!
5//! ## Examples
6//!
7//! ### Atom
8//!
9//! ```rust
10//! use readfeed::atom::{self, Elem, EntryElem, FeedElem};
11//!
12//! let input = r#"
13//! <feed xmlns="http://www.w3.org/2005/Atom">
14//!     <title>Lorem ipsum dolor sit amet.</title>
15//!     <link href="https://example.com/"/>
16//!     <updated>2021-02-24T09:08:10Z</updated>
17//!     <id>urn:uuid:ba9192e8-9e34-4c23-8445-94b67ba316ee</id>
18//!     <entry>
19//!         <title>Lorem ipsum dolor sit.</title>
20//!         <link href="http://example.com/2021/02/24/hello"/>
21//!         <id>urn:uuid:425ba23c-d283-4580-8a3c-3b67aaa6b373</id>
22//!         <updated>2021-02-24T09:08:10Z</updated>
23//!         <summary>Lorem ipsum dolor sit amet, consectetur adipiscing.</summary>
24//!     </entry>
25//! </feed>
26//! "#;
27//!
28//! let mut iter = atom::Iter::new(input);
29//!
30//! let Some(Elem::Feed(mut feed_iter)) = iter.next() else {
31//!     panic!();
32//! };
33//!
34//! if let Some(FeedElem::Title(title)) = feed_iter.next() {
35//!     assert_eq!("Lorem ipsum dolor sit amet.", title.content());
36//! } else {
37//!     panic!();
38//! }
39//!
40//! if let Some(FeedElem::Link(link)) = feed_iter.next() {
41//!     assert_eq!(Some("https://example.com/"), link.href().map(|v| v.as_str()));
42//! } else {
43//!     panic!();
44//! }
45//!
46//! if let Some(FeedElem::Updated(updated)) = feed_iter.next() {
47//!     assert_eq!("2021-02-24T09:08:10Z", updated.content());
48//! } else {
49//!     panic!();
50//! }
51//!
52//! if let Some(FeedElem::Id(id)) = feed_iter.next() {
53//!     assert_eq!("urn:uuid:ba9192e8-9e34-4c23-8445-94b67ba316ee", id.content());
54//! } else {
55//!     panic!();
56//! }
57//!
58//! if let Some(FeedElem::Entry(mut entry_iter)) = feed_iter.next() {
59//!     if let Some(EntryElem::Title(title)) = entry_iter.next() {
60//!         assert_eq!("Lorem ipsum dolor sit.", title.content());
61//!     } else {
62//!         panic!();
63//!     }
64//!     if let Some(EntryElem::Link(link)) = entry_iter.next() {
65//!         assert_eq!(Some("http://example.com/2021/02/24/hello"), link.href().map(|v| v.as_str()));
66//!     } else {
67//!         panic!();
68//!     }
69//!     if let Some(EntryElem::Id(id)) = entry_iter.next() {
70//!         assert_eq!("urn:uuid:425ba23c-d283-4580-8a3c-3b67aaa6b373", id.content());
71//!     } else {
72//!         panic!();
73//!     }
74//!     if let Some(EntryElem::Updated(updated)) = entry_iter.next() {
75//!         assert_eq!("2021-02-24T09:08:10Z", updated.content());
76//!     } else {
77//!         panic!();
78//!     }
79//!     if let Some(EntryElem::Summary(summary)) = entry_iter.next() {
80//!         assert_eq!("Lorem ipsum dolor sit amet, consectetur adipiscing.", summary.content());
81//!     } else {
82//!         panic!();
83//!     }
84//!     assert_eq!(None, entry_iter.next());
85//! } else {
86//!     panic!();
87//! }
88//!
89//! assert_eq!(None, feed_iter.next());
90//! assert_eq!(None, iter.next());
91//! ```
92//!
93//! [atom]: https://datatracker.ietf.org/doc/html/rfc4287
94
95use maybe_xml::{
96    token::{
97        self,
98        prop::{AttributeValue, Attributes, TagName},
99        Token,
100    },
101    Reader,
102};
103
104use crate::{xml, Tag};
105
106macro_rules! content_elem {
107    ($name:ident $(,)?) => {
108        #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
109        pub struct $name<'a> {
110            tag: Tag<'a>,
111            content: &'a str,
112        }
113
114        impl<'a> $name<'a> {
115            #[inline]
116            #[must_use]
117            pub const fn content(&self) -> &'a str {
118                self.content
119            }
120
121            #[inline]
122            #[must_use]
123            pub const fn attributes(&self) -> Option<Attributes<'a>> {
124                self.tag.attributes()
125            }
126        }
127    };
128    ($name:ident, $($nms:ident),+ $(,)?) => {
129        content_elem!($name);
130        content_elem!($($nms),+);
131    };
132}
133
134macro_rules! impl_attr {
135    ($x:ident, $fn_name:ident, $name:literal) => {
136        impl<'a> $x<'a> {
137            #[inline]
138            #[must_use]
139            pub fn $fn_name(&self) -> Option<AttributeValue<'a>> {
140                self.tag.find_attribute($name)
141            }
142        }
143    };
144}
145
146macro_rules! impl_text_construct {
147    ($name:ident $(,)?) => {
148        content_elem!($name);
149
150        impl<'a> $name<'a> {
151            #[inline]
152            #[must_use]
153            pub fn ty(&self) -> Option<AttributeValue<'a>> {
154                self.tag.find_attribute("type")
155            }
156        }
157    };
158    ($name:ident, $($nms:ident),+ $(,)?) => {
159        impl_text_construct!($name);
160        impl_text_construct!($($nms),+);
161    };
162}
163
164macro_rules! impl_date_construct {
165    ($name:ident $(,)?) => {
166        content_elem!($name);
167    };
168    ($name:ident, $($nms:ident),+ $(,)?) => {
169        impl_date_construct!($name);
170        impl_date_construct!($($nms),+);
171    };
172}
173
174macro_rules! impl_uri_construct {
175    ($name:ident $(,)?) => {
176        content_elem!($name);
177    };
178    ($name:ident, $($nms:ident),+ $(,)?) => {
179        impl_uri_construct!($name);
180        impl_uri_construct!($($nms),+);
181    };
182}
183
184macro_rules! impl_iter {
185    (with_tag $iter_name:ident, $elem_ty:ident, $fn_name:expr) => {
186        #[derive(Debug, Clone, PartialEq, Eq, Hash)]
187        pub struct $iter_name<'a> {
188            tag: Tag<'a>,
189            reader: Reader<'a>,
190            pos: usize,
191        }
192
193        impl_iter!($iter_name, $elem_ty, $fn_name);
194
195        impl<'a> $iter_name<'a> {
196            #[inline]
197            #[must_use]
198            pub const fn attributes(&self) -> Option<Attributes<'a>> {
199                self.tag.attributes()
200            }
201        }
202    };
203    ($iter_name:ident, $elem_ty:ident, $fn_name:expr) => {
204        impl<'a> Iterator for $iter_name<'a> {
205            type Item = $elem_ty<'a>;
206
207            fn next(&mut self) -> Option<Self::Item> {
208                while let Some(token) = self.reader.tokenize(&mut self.pos) {
209                    match token.ty() {
210                        token::Ty::StartTag(tag) => {
211                            let tag_name = tag.name();
212
213                            let content = xml::collect_bytes_until_end_tag(
214                                tag_name,
215                                &self.reader,
216                                &mut self.pos,
217                            );
218
219                            return Some($fn_name(Tag::Start(tag), tag_name, content));
220                        }
221                        token::Ty::EmptyElementTag(tag) => {
222                            let tag_name = tag.name();
223
224                            return Some($fn_name(Tag::EmptyElement(tag), tag_name, ""));
225                        }
226                        token::Ty::Characters(content) => {
227                            if content.content().as_str().trim().is_empty() {
228                                continue;
229                            }
230                        }
231                        token::Ty::EndTag(_)
232                        | token::Ty::ProcessingInstruction(_)
233                        | token::Ty::Declaration(_)
234                        | token::Ty::Comment(_)
235                        | token::Ty::Cdata(_) => {
236                            // skip
237                        }
238                    }
239
240                    return Some($elem_ty::Raw(token));
241                }
242
243                None
244            }
245        }
246    };
247}
248
249content_elem!(Unknown);
250
251impl<'a> Unknown<'a> {
252    #[inline]
253    #[must_use]
254    pub fn tag_name(&self) -> TagName<'a> {
255        self.tag.tag_name()
256    }
257}
258
259content_elem!(Link);
260impl_attr!(Link, href, "href");
261impl_attr!(Link, rel, "rel");
262impl_attr!(Link, ty, "type");
263impl_attr!(Link, hreflang, "hreflang");
264impl_attr!(Link, title, "title");
265impl_attr!(Link, length, "length");
266
267content_elem!(Category);
268impl_attr!(Category, term, "term");
269impl_attr!(Category, scheme, "scheme");
270impl_attr!(Category, label, "label");
271
272content_elem!(Content);
273impl_attr!(Content, ty, "type");
274impl_attr!(Content, src, "src");
275
276impl_text_construct!(Generator);
277impl_attr!(Generator, uri, "uri");
278impl_attr!(Generator, version, "version");
279
280impl_uri_construct!(Icon, Id, Logo);
281
282impl_date_construct!(Published, Updated);
283
284impl_text_construct!(Rights, Subtitle, Summary, Title);
285
286content_elem!(PersonName, PersonEmail);
287
288impl_uri_construct!(PersonUri);
289
290#[derive(Debug, PartialEq, Eq, Hash)]
291pub enum PersonElem<'a> {
292    Email(PersonEmail<'a>),
293    Name(PersonName<'a>),
294    Uri(PersonUri<'a>),
295    Unknown(Unknown<'a>),
296    Raw(Token<'a>),
297}
298
299#[derive(Debug, PartialEq, Eq, Hash)]
300pub enum SourceElem<'a> {
301    Author(PersonIter<'a>),
302    Category(Category<'a>),
303    Contributor(PersonIter<'a>),
304    Generator(Generator<'a>),
305    Icon(Icon<'a>),
306    Id(Id<'a>),
307    Link(Link<'a>),
308    Logo(Logo<'a>),
309    Rights(Rights<'a>),
310    Subtitle(Subtitle<'a>),
311    Title(Title<'a>),
312    Updated(Updated<'a>),
313    Unknown(Unknown<'a>),
314    Raw(Token<'a>),
315}
316
317#[derive(Debug, PartialEq, Eq, Hash)]
318pub enum EntryElem<'a> {
319    Author(PersonIter<'a>),
320    Category(Category<'a>),
321    Content(Content<'a>),
322    Contributor(PersonIter<'a>),
323    Id(Id<'a>),
324    Link(Link<'a>),
325    Published(Published<'a>),
326    Rights(Rights<'a>),
327    Source(SourceIter<'a>),
328    Summary(Summary<'a>),
329    Title(Title<'a>),
330    Updated(Updated<'a>),
331    Unknown(Unknown<'a>),
332    Raw(Token<'a>),
333}
334
335#[derive(Debug, PartialEq, Eq, Hash)]
336pub enum FeedElem<'a> {
337    Author(PersonIter<'a>),
338    Category(Category<'a>),
339    Contributor(PersonIter<'a>),
340    Generator(Generator<'a>),
341    Icon(Icon<'a>),
342    Id(Id<'a>),
343    Link(Link<'a>),
344    Logo(Logo<'a>),
345    Rights(Rights<'a>),
346    Subtitle(Subtitle<'a>),
347    Title(Title<'a>),
348    Updated(Updated<'a>),
349    Entry(EntryIter<'a>),
350    Unknown(Unknown<'a>),
351    Raw(Token<'a>),
352}
353
354#[derive(Debug, Clone, PartialEq, Eq, Hash)]
355pub enum Elem<'a> {
356    Feed(FeedIter<'a>),
357    Unknown(Unknown<'a>),
358    Raw(Token<'a>),
359}
360
361impl<'a> PersonElem<'a> {
362    fn new(tag: Tag<'a>, tag_name: TagName<'a>, content: &'a str) -> PersonElem<'a> {
363        let local_name = tag_name.local().as_str();
364
365        macro_rules! return_content_with_tag {
366            ($local_name: literal, $inner_ty: ident, $elem_ty: expr) => {
367                if local_name.eq_ignore_ascii_case($local_name) {
368                    return $elem_ty($inner_ty { tag, content });
369                }
370            };
371        }
372
373        return_content_with_tag!("name", PersonName, PersonElem::Name);
374        return_content_with_tag!("uri", PersonUri, PersonElem::Uri);
375        return_content_with_tag!("email", PersonEmail, PersonElem::Email);
376
377        PersonElem::Unknown(Unknown { tag, content })
378    }
379}
380
381impl<'a> SourceElem<'a> {
382    fn new(tag: Tag<'a>, tag_name: TagName<'a>, content: &'a str) -> SourceElem<'a> {
383        let local_name = tag_name.local().as_str();
384
385        macro_rules! return_content_with_tag {
386            ($local_name: literal, $inner_ty: ident, $elem_ty: expr) => {
387                if local_name.eq_ignore_ascii_case($local_name) {
388                    return $elem_ty($inner_ty { tag, content });
389                }
390            };
391        }
392
393        macro_rules! return_iter {
394            ($local_name: literal, $inner_ty: ident, $elem_ty: expr) => {
395                if local_name.eq_ignore_ascii_case($local_name) {
396                    return $elem_ty($inner_ty {
397                        tag,
398                        reader: Reader::from_str(content),
399                        pos: 0,
400                    });
401                }
402            };
403        }
404
405        return_iter!("author", PersonIter, SourceElem::Author);
406
407        return_content_with_tag!("category", Category, SourceElem::Category);
408
409        return_iter!("contributor", PersonIter, SourceElem::Contributor);
410
411        return_content_with_tag!("generator", Generator, SourceElem::Generator);
412        return_content_with_tag!("icon", Icon, SourceElem::Icon);
413        return_content_with_tag!("id", Id, SourceElem::Id);
414        return_content_with_tag!("link", Link, SourceElem::Link);
415        return_content_with_tag!("logo", Logo, SourceElem::Logo);
416        return_content_with_tag!("rights", Rights, SourceElem::Rights);
417        return_content_with_tag!("subtitle", Subtitle, SourceElem::Subtitle);
418        return_content_with_tag!("title", Title, SourceElem::Title);
419        return_content_with_tag!("updated", Updated, SourceElem::Updated);
420
421        SourceElem::Unknown(Unknown { tag, content })
422    }
423}
424
425impl<'a> EntryElem<'a> {
426    fn new(tag: Tag<'a>, tag_name: TagName<'a>, content: &'a str) -> EntryElem<'a> {
427        let local_name = tag_name.local().as_str();
428
429        macro_rules! return_content_with_tag {
430            ($local_name: literal, $inner_ty: ident, $elem_ty: expr) => {
431                if local_name.eq_ignore_ascii_case($local_name) {
432                    return $elem_ty($inner_ty { tag, content });
433                }
434            };
435        }
436
437        macro_rules! return_iter {
438            ($local_name: literal, $inner_ty: ident, $elem_ty: expr) => {
439                if local_name.eq_ignore_ascii_case($local_name) {
440                    return $elem_ty($inner_ty {
441                        tag,
442                        reader: Reader::from_str(content),
443                        pos: 0,
444                    });
445                }
446            };
447        }
448
449        return_iter!("author", PersonIter, EntryElem::Author);
450
451        return_content_with_tag!("category", Category, EntryElem::Category);
452        return_content_with_tag!("content", Content, EntryElem::Content);
453
454        return_iter!("contributor", PersonIter, EntryElem::Contributor);
455
456        return_content_with_tag!("id", Id, EntryElem::Id);
457        return_content_with_tag!("link", Link, EntryElem::Link);
458        return_content_with_tag!("published", Published, EntryElem::Published);
459        return_content_with_tag!("rights", Rights, EntryElem::Rights);
460
461        return_iter!("source", SourceIter, EntryElem::Source);
462
463        return_content_with_tag!("summary", Summary, EntryElem::Summary);
464        return_content_with_tag!("title", Title, EntryElem::Title);
465        return_content_with_tag!("updated", Updated, EntryElem::Updated);
466
467        EntryElem::Unknown(Unknown { tag, content })
468    }
469}
470
471impl<'a> FeedElem<'a> {
472    fn new(tag: Tag<'a>, tag_name: TagName<'a>, content: &'a str) -> FeedElem<'a> {
473        let local_name = tag_name.local().as_str();
474
475        macro_rules! return_content_with_tag {
476            ($local_name: literal, $inner_ty: ident, $elem_ty: expr) => {
477                if local_name.eq_ignore_ascii_case($local_name) {
478                    return $elem_ty($inner_ty { tag, content });
479                }
480            };
481        }
482
483        macro_rules! return_iter {
484            ($local_name: literal, $inner_ty: ident, $elem_ty: expr) => {
485                if local_name.eq_ignore_ascii_case($local_name) {
486                    return $elem_ty($inner_ty {
487                        tag,
488                        reader: Reader::from_str(content),
489                        pos: 0,
490                    });
491                }
492            };
493        }
494
495        return_iter!("entry", EntryIter, FeedElem::Entry);
496
497        return_iter!("author", PersonIter, FeedElem::Author);
498
499        return_content_with_tag!("category", Category, FeedElem::Category);
500
501        return_iter!("contributor", PersonIter, FeedElem::Contributor);
502
503        return_content_with_tag!("generator", Generator, FeedElem::Generator);
504        return_content_with_tag!("icon", Icon, FeedElem::Icon);
505        return_content_with_tag!("id", Id, FeedElem::Id);
506        return_content_with_tag!("link", Link, FeedElem::Link);
507        return_content_with_tag!("logo", Logo, FeedElem::Logo);
508        return_content_with_tag!("rights", Rights, FeedElem::Rights);
509        return_content_with_tag!("subtitle", Subtitle, FeedElem::Subtitle);
510        return_content_with_tag!("title", Title, FeedElem::Title);
511        return_content_with_tag!("updated", Updated, FeedElem::Updated);
512
513        FeedElem::Unknown(Unknown { tag, content })
514    }
515}
516
517impl<'a> Elem<'a> {
518    fn new(tag: Tag<'a>, tag_name: TagName<'a>, content: &'a str) -> Elem<'a> {
519        let local_name = tag_name.local().as_str();
520
521        macro_rules! return_iter {
522            ($local_name: literal, $inner_ty: ident, $elem_ty: expr) => {
523                if local_name.eq_ignore_ascii_case($local_name) {
524                    return $elem_ty($inner_ty {
525                        tag,
526                        reader: Reader::from_str(content),
527                        pos: 0,
528                    });
529                }
530            };
531        }
532
533        return_iter!("feed", FeedIter, Elem::Feed);
534
535        Elem::Unknown(Unknown { tag, content })
536    }
537}
538
539impl_iter!(with_tag PersonIter, PersonElem, PersonElem::new);
540impl_iter!(with_tag SourceIter, SourceElem, SourceElem::new);
541impl_iter!(with_tag EntryIter, EntryElem, EntryElem::new);
542impl_iter!(with_tag FeedIter, FeedElem, FeedElem::new);
543
544#[derive(Debug)]
545pub struct Iter<'a> {
546    reader: Reader<'a>,
547    pos: usize,
548}
549
550impl<'a> Iter<'a> {
551    #[inline]
552    #[must_use]
553    pub fn new(input: &'a str) -> Self {
554        Self {
555            reader: Reader::from_str(input),
556            pos: 0,
557        }
558    }
559}
560
561impl_iter!(Iter, Elem, Elem::new);
562
563#[cfg(test)]
564mod tests {
565    use super::*;
566
567    #[allow(clippy::too_many_lines)]
568    #[test]
569    fn eval_atom_1() {
570        let input = include_str!("../tests/resources/atom-1.xml");
571
572        let mut iter = Iter::new(input);
573
574        let Some(Elem::Raw(token)) = iter.next() else {
575            panic!();
576        };
577        if let token::Ty::ProcessingInstruction(pi) = token.ty() {
578            assert_eq!(r#"<?xml version="1.0" encoding="utf-8"?>"#, pi.as_str());
579        } else {
580            panic!();
581        }
582
583        let Some(Elem::Feed(mut feed_iter)) = iter.next() else {
584            panic!();
585        };
586
587        if let Some(FeedElem::Title(title)) = feed_iter.next() {
588            assert_eq!("Lorem ipsum dolor sit amet.", title.content());
589        } else {
590            panic!();
591        }
592        if let Some(FeedElem::Link(link)) = feed_iter.next() {
593            assert_eq!(
594                Some("https://example.com/"),
595                link.href().map(|v| v.as_str())
596            );
597        } else {
598            panic!();
599        }
600        if let Some(FeedElem::Updated(updated)) = feed_iter.next() {
601            assert_eq!("2021-02-24T09:08:10Z", updated.content());
602        } else {
603            panic!();
604        }
605
606        if let Some(FeedElem::Author(mut person_iter)) = feed_iter.next() {
607            if let Some(PersonElem::Name(name)) = person_iter.next() {
608                assert_eq!("Jane Doe", name.content());
609            }
610            assert_eq!(None, person_iter.next());
611        } else {
612            panic!()
613        }
614
615        if let Some(FeedElem::Id(id)) = feed_iter.next() {
616            assert_eq!(
617                "urn:uuid:ba9192e8-9e34-4c23-8445-94b67ba316ee",
618                id.content()
619            );
620        } else {
621            panic!()
622        }
623
624        if let Some(FeedElem::Entry(mut entry_iter)) = feed_iter.next() {
625            if let Some(EntryElem::Title(title)) = entry_iter.next() {
626                assert_eq!("Lorem ipsum dolor sit.", title.content());
627            } else {
628                panic!();
629            }
630            if let Some(EntryElem::Link(link)) = entry_iter.next() {
631                assert_eq!(
632                    Some("http://example.com/2021/02/24/hello"),
633                    link.href().map(|v| v.as_str())
634                );
635            } else {
636                panic!();
637            }
638            if let Some(EntryElem::Id(id)) = entry_iter.next() {
639                assert_eq!(
640                    "urn:uuid:425ba23c-d283-4580-8a3c-3b67aaa6b373",
641                    id.content()
642                );
643            } else {
644                panic!()
645            }
646            if let Some(EntryElem::Updated(updated)) = entry_iter.next() {
647                assert_eq!("2021-02-24T09:08:10Z", updated.content());
648            } else {
649                panic!();
650            }
651            if let Some(EntryElem::Summary(summary)) = entry_iter.next() {
652                assert_eq!(
653                    "Lorem ipsum dolor sit amet, consectetur adipiscing.",
654                    summary.content()
655                );
656            } else {
657                panic!()
658            }
659            assert_eq!(None, entry_iter.next());
660        } else {
661            panic!()
662        }
663
664        assert_eq!(None, feed_iter.next());
665    }
666}