rss_gen/
parser.rs

1// Copyright © 2024 RSS Gen. All rights reserved.
2// SPDX-License-Identifier: Apache-2.0 OR MIT
3
4//! A robust and flexible RSS feed parser.
5//!
6//! This module provides functionality to parse RSS feeds of various versions
7//! (0.90, 0.91, 0.92, 1.0, and 2.0) into a structured format. It offers
8//! comprehensive error handling, extensive customization options, and follows
9//! best practices in Rust development.
10//!
11//! # Features
12//!
13//! - Supports RSS versions 0.90, 0.91, 0.92, 1.0, and 2.0
14//! - Robust error handling with custom error types
15//! - Extensible parsing with custom element handlers
16//! - Comprehensive test suite
17//! - Thread-safe and memory-efficient implementation
18//!
19//! # Examples
20//!
21//! ```rust
22//! use rss_gen::parse_rss;
23//!
24//! let xml_content = r#"
25//!     <?xml version="1.0" encoding="UTF-8"?>
26//!     <rss version="2.0">
27//!         <channel>
28//!             <title>My Blog</title>
29//!             <link>https://example.com</link>
30//!             <description>A sample blog</description>
31//!             <item>
32//!                 <title>First Post</title>
33//!                 <link>https://example.com/first-post</link>
34//!                 <description>This is my first post</description>
35//!             </item>
36//!         </channel>
37//!     </rss>
38//! "#;
39//!
40//! let parsed_data = parse_rss(xml_content, None).unwrap();
41//! assert_eq!(parsed_data.title, "My Blog");
42//! assert_eq!(parsed_data.items.len(), 1);
43//! ```
44
45use quick_xml::events::{
46    BytesCData, BytesEnd, BytesStart, BytesText, Event,
47};
48use quick_xml::Reader;
49use std::borrow::Cow;
50use std::sync::Arc;
51
52pub use crate::data::{RssData, RssItem, RssVersion};
53pub use crate::error::{Result, RssError};
54
55/// A trait for custom element handlers, supporting RSS extensions.
56///
57/// Implement this trait to provide custom parsing logic for specific RSS elements.
58pub trait ElementHandler: Send + Sync {
59    /// Handle a specific RSS element.
60    ///
61    /// This function processes a single RSS element and performs necessary
62    /// operations based on the element's name, text content, and attributes.
63    ///
64    /// # Arguments
65    ///
66    /// * `name` - The name of the RSS element.
67    /// * `text` - The text content of the RSS element.
68    /// * `attributes` - A slice containing the attributes of the RSS element.
69    ///
70    /// # Returns
71    ///
72    /// This function returns a `Result<()>` indicating the success or failure of
73    /// the handling operation.
74    ///
75    /// # Errors
76    ///
77    /// This function will return an `Err` in the following situations:
78    ///
79    /// - If there is an issue with processing the element, such as invalid
80    ///   attributes, unexpected element names, or a failure in custom parsing
81    ///   logic.
82    fn handle_element(
83        &self,
84        name: &str,
85        text: &str,
86        attributes: &[(String, String)],
87    ) -> Result<()>;
88}
89
90/// Configuration options for the RSS parser.
91///
92/// The `ParserConfig` struct allows for customization of the RSS parser by
93/// including custom handlers for specific elements.
94#[derive(Default)]
95pub struct ParserConfig {
96    /// A vector of custom handlers that will process specific RSS elements.
97    ///
98    /// Each handler implements the `ElementHandler` trait and is wrapped in
99    /// an `Arc` to allow shared ownership across threads.
100    pub custom_handlers: Vec<Arc<dyn ElementHandler>>,
101}
102
103/// Parses a channel element and sets the corresponding field in `RssData`.
104///
105/// This function processes elements found within the `channel` tag of an RSS feed
106/// and assigns the appropriate values to the `RssData` struct.
107///
108/// # Arguments
109///
110/// * `rss_data` - A mutable reference to the `RssData` struct.
111/// * `element` - The name of the channel element.
112/// * `text` - The text content of the channel element.
113/// * `is_rss_1_0` - A boolean indicating if the feed is RSS 1.0.
114fn parse_channel_element(
115    rss_data: &mut RssData,
116    element: &str,
117    text: &str,
118    is_rss_1_0: bool,
119) -> Result<()> {
120    match element {
121        "title" => {
122            rss_data.title = text.to_string();
123            Ok(())
124        }
125        "link" => {
126            rss_data.link = text.to_string();
127            Ok(())
128        }
129        "description" => {
130            rss_data.description = text.to_string();
131            Ok(())
132        }
133        "language" => {
134            rss_data.language = text.to_string();
135            Ok(())
136        }
137        "copyright" => {
138            rss_data.copyright = text.to_string();
139            Ok(())
140        }
141        "managingEditor" => {
142            rss_data.managing_editor = text.to_string();
143            Ok(())
144        }
145        "webMaster" => {
146            rss_data.webmaster = text.to_string();
147            Ok(())
148        }
149        "pubDate" => {
150            rss_data.pub_date = text.to_string();
151            Ok(())
152        }
153        "lastBuildDate" => {
154            rss_data.last_build_date = text.to_string();
155            Ok(())
156        }
157        "category" => {
158            rss_data.category = text.to_string();
159            Ok(())
160        }
161        "generator" => {
162            rss_data.generator = text.to_string();
163            Ok(())
164        }
165        "docs" => {
166            rss_data.docs = text.to_string();
167            Ok(())
168        }
169        "ttl" => {
170            rss_data.ttl = text.to_string();
171            Ok(())
172        }
173        // Handle RSS 1.0 specific elements
174        "items" => {
175            if is_rss_1_0 {
176                Ok(())
177            } else {
178                Err(RssError::UnknownElement("items".into()))
179            }
180        }
181        "rdf:Seq" => {
182            if is_rss_1_0 {
183                Ok(())
184            } else {
185                Err(RssError::UnknownElement("rdf:Seq".into()))
186            }
187        }
188        "rdf:li" => {
189            if is_rss_1_0 {
190                Ok(())
191            } else {
192                Err(RssError::UnknownElement("rdf:li".into()))
193            }
194        }
195        _ => Err(RssError::UnknownElement(format!(
196            "Unknown channel element: {}",
197            element
198        ))),
199    }
200}
201
202/// Parses an item element and sets the corresponding field in `RssItem`.
203///
204/// This function processes elements found within the `item` tag of an RSS feed
205/// and assigns the appropriate values to the `RssItem` struct.
206///
207/// # Arguments
208///
209/// * `item` - A mutable reference to the `RssItem` struct.
210/// * `element` - The name of the item element.
211/// * `text` - The text content of the item element.
212/// * `attributes` - A slice containing the element's attributes as key-value pairs.
213fn parse_item_element(
214    item: &mut RssItem,
215    element: &str,
216    text: &str,
217    attributes: &[(String, String)],
218) {
219    match element {
220        "title" => {
221            item.title = text.to_string();
222        }
223        "link" => {
224            item.link = text.to_string();
225        }
226        "description" => {
227            item.description = text.to_string();
228        }
229        "author" => {
230            item.author = text.to_string();
231        }
232        "guid" => {
233            item.guid = text.to_string();
234        }
235        "pubDate" => {
236            item.pub_date = text.to_string();
237        }
238        "category" => {
239            item.category = Some(text.to_string());
240        }
241        "comments" => {
242            item.comments = Some(text.to_string());
243        }
244        "enclosure" => {
245            if attributes.is_empty() {
246                item.enclosure = None;
247            } else {
248                let enclosure_str = attributes
249                    .iter()
250                    .map(|(k, v)| format!("{}=\"{}\"", k, v))
251                    .collect::<Vec<String>>()
252                    .join(" ");
253                item.enclosure = Some(enclosure_str);
254            }
255        }
256        "source" => {
257            item.source = Some(text.to_string());
258        }
259        _ => (), // Ignore unknown elements
260    }
261}
262
263/// Represents the current parsing state (whether inside a channel, item, or image).
264#[derive(Clone)]
265enum ParsingState {
266    Channel,
267    Item,
268    Image,
269    None, // When not in any of these states
270}
271
272/// Represents the context of the current element being parsed in the RSS feed.
273struct ParsingContext<'a> {
274    is_rss_1_0: bool,
275    state: ParsingState,
276    current_element: &'a str,
277    text: &'a str,
278    current_attributes: &'a [(String, String)],
279}
280
281impl<'a> ParsingContext<'a> {
282    /// Helper function to check if the current state is in a channel.
283    pub fn in_channel(&self) -> bool {
284        matches!(self.state, ParsingState::Channel)
285    }
286
287    /// Helper function to check if the current state is in an item.
288    pub fn in_item(&self) -> bool {
289        matches!(self.state, ParsingState::Item)
290    }
291
292    /// Helper function to check if the current state is in an image.
293    pub fn in_image(&self) -> bool {
294        matches!(self.state, ParsingState::Image)
295    }
296}
297
298/// Represents the image data in an RSS feed.
299struct ImageData {
300    title: String,
301    url: String,
302    link: String,
303}
304
305/// Handles text events for both regular text and CDATA in RSS feeds.
306///
307/// This function processes both text and CDATA events, parsing the content
308/// and assigning values to either channel, item, or image elements in the feed.
309///
310/// # Arguments
311///
312/// * `rss_data` - A mutable reference to the `RssData` struct representing the RSS feed being processed.
313/// * `context` - A `ParsingContext` struct containing details about the current state of the parser (e.g., whether it's within a channel, item, or image, and the element being processed).
314/// * `current_item` - A mutable reference to the `RssItem` struct, representing the current item being parsed in the RSS feed.
315/// * `image_data` - A mutable reference to an `ImageData` struct for storing the parsed `title`, `url`, and `link` of the image element if applicable.
316///
317/// # Returns
318///
319/// A `Result` indicating the success or failure of handling the text event.
320fn handle_text_event(
321    rss_data: &mut RssData,
322    context: &ParsingContext,
323    current_item: &mut RssItem,
324    image_data: &mut ImageData,
325) -> Result<()> {
326    if context.in_channel() && !context.in_item() && !context.in_image()
327    {
328        if !context.current_element.is_empty() {
329            parse_channel_element(
330                rss_data,
331                context.current_element,
332                &Cow::Owned(context.text.to_string()),
333                context.is_rss_1_0,
334            )?;
335        }
336    } else if context.in_item() && !context.current_element.is_empty() {
337        parse_item_element(
338            current_item,
339            context.current_element,
340            context.text,
341            context.current_attributes,
342        );
343    } else if context.in_image() && !context.current_element.is_empty()
344    {
345        match context.current_element {
346            "title" => image_data.title = context.text.to_string(),
347            "url" => image_data.url = context.text.to_string(),
348            "link" => image_data.link = context.text.to_string(),
349            _ => (),
350        }
351    }
352    Ok(())
353}
354
355/// Parses an RSS feed from XML content.
356///
357/// This function takes XML content as input and parses it into an `RssData` struct.
358/// It supports parsing RSS versions 0.90, 0.91, 0.92, 1.0, and 2.0.
359///
360/// # Arguments
361///
362/// * `xml_content` - A string slice containing the XML content of the RSS feed.
363/// * `config` - Optional configuration for custom parsing behavior.
364///
365/// # Returns
366///
367/// * `Ok(RssData)` - The parsed RSS data if successful.
368/// * `Err(RssError)` - An error if parsing fails.
369///
370/// # Errors
371///
372/// This function returns an `Err(RssError)` in the following cases:
373///
374/// - If the XML content is invalid or malformed, a `RssError::XmlParseError` is returned.
375/// - If an unsupported or invalid RSS version is encountered, a `RssError::InvalidInput` is returned.
376/// - If an unknown or unsupported element is encountered during parsing, a `RssError::UnknownElement` is returned.
377pub fn parse_rss(
378    xml_content: &str,
379    config: Option<&ParserConfig>,
380) -> Result<RssData> {
381    let mut reader = Reader::from_str(xml_content);
382    let mut rss_data = RssData::new(None);
383    let mut buf = Vec::with_capacity(1024);
384    let mut context = ParserContext::new();
385
386    loop {
387        match reader.read_event_into(&mut buf) {
388            Ok(Event::Start(ref e)) => {
389                process_start_event(e, &mut context, &mut rss_data)?;
390            }
391            Ok(Event::End(ref e)) => {
392                process_end_event(e, &mut context, &mut rss_data);
393            }
394            Ok(Event::Text(ref e)) => process_text_event(
395                e,
396                &mut context,
397                &mut rss_data,
398                config,
399            )?,
400            Ok(Event::CData(ref e)) => process_cdata_event(
401                e,
402                &mut context,
403                &mut rss_data,
404                config,
405            )?,
406            Ok(Event::Eof) => break Ok(rss_data),
407            Err(e) => return Err(RssError::XmlParseError(e)),
408            _ => (),
409        }
410        buf.clear();
411    }
412}
413
414/// Processes the start event of an XML element during RSS feed parsing.
415///
416/// This function handles the start of an XML element in an RSS feed, determining the RSS version,
417/// handling different element types (e.g., "channel", "item", "image"), and extracting attributes
418/// from the element.
419///
420/// # Arguments
421///
422/// * `e` - A reference to the `BytesStart` struct representing the start of an XML element.
423/// * `context` - A mutable reference to the `ParserContext` struct, which maintains the current parsing state.
424/// * `rss_data` - A mutable reference to the `RssData` struct, which stores the parsed RSS data.
425fn process_start_event(
426    e: &BytesStart<'_>,
427    context: &mut ParserContext,
428    _rss_data: &mut RssData,
429) -> Result<()> {
430    let name_str = String::from_utf8_lossy(e.name().0).into_owned();
431    if name_str.is_empty() {
432        return Ok(());
433    }
434
435    // Detect RSS version or RDF for RSS 1.0
436    match name_str.as_str() {
437        "rss" | "rdf:RDF" => {
438            // Skip root elements like <rss> or <rdf:RDF>, continue to parse children
439            return Ok(());
440        }
441        "channel" => {
442            // Correctly handle the `channel` element inside the RSS root
443            context.parsing_state = ParsingState::Channel;
444            return Ok(());
445        }
446        "item" => {
447            context.parsing_state = ParsingState::Item;
448            context.current_item = RssItem::new();
449        }
450        "image" => {
451            context.parsing_state = ParsingState::Image;
452        }
453        _ => {
454            // Only return an error for truly unknown elements, ignoring root elements
455            if !matches!(
456                context.parsing_state,
457                ParsingState::Item
458                    | ParsingState::Channel
459                    | ParsingState::Image
460            ) {
461                return Err(RssError::UnknownElement(format!(
462                    "Unknown element: {}",
463                    name_str
464                )));
465            }
466        }
467    }
468
469    // Store current element and attributes
470    context.current_element = name_str;
471    context.current_attributes = e
472        .attributes()
473        .filter_map(std::result::Result::ok)
474        .map(|a| {
475            (
476                String::from_utf8_lossy(a.key.0).into_owned(),
477                String::from_utf8_lossy(&a.value).into_owned(),
478            )
479        })
480        .collect();
481
482    Ok(())
483}
484
485/// Processes the end event of an XML element during RSS feed parsing.
486///
487/// This function handles the end of an XML element in an RSS feed, updating the parsing state
488/// based on the element type (e.g., "channel", "item", "image").
489///
490/// # Arguments
491///
492/// * `e` - A reference to the `BytesEnd` struct representing the end of an XML element.
493/// * `context` - A mutable reference to the `ParserContext` struct, which maintains the current parsing state.
494/// * `rss_data` - A mutable reference to the `RssData` struct, which stores the parsed RSS data.
495fn process_end_event(
496    e: &BytesEnd<'_>,
497    context: &mut ParserContext,
498    rss_data: &mut RssData,
499) {
500    let name = e.name().0.to_vec();
501    if name == b"channel" {
502        if matches!(context.parsing_state, ParsingState::Channel) {
503            context.parsing_state = ParsingState::None;
504        }
505    } else if name == b"item" {
506        if matches!(context.parsing_state, ParsingState::Item) {
507            context.parsing_state = ParsingState::None;
508            rss_data.add_item(context.current_item.clone());
509        }
510    } else if name == b"image"
511        && matches!(context.parsing_state, ParsingState::Image)
512    {
513        context.parsing_state = ParsingState::None;
514        rss_data.set_image(
515            &context.image_title.clone(),
516            &context.image_url.clone(),
517            &context.image_link.clone(),
518        );
519    }
520    context.current_element.clear();
521    context.current_attributes.clear();
522}
523
524fn process_text_event(
525    e: &BytesText<'_>,
526    context: &mut ParserContext,
527    rss_data: &mut RssData,
528    config: Option<&ParserConfig>,
529) -> Result<()> {
530    let text = e.unescape()?.into_owned();
531
532    let parse_context = ParsingContext {
533        is_rss_1_0: matches!(
534            context.rss_version,
535            RssVersionState::Rss1_0
536        ),
537        state: context.parsing_state.clone(),
538        current_element: &context.current_element,
539        text: &text,
540        current_attributes: &context.current_attributes,
541    };
542
543    let mut image_data = ImageData {
544        title: context.image_title.clone(),
545        url: context.image_url.clone(),
546        link: context.image_link.clone(),
547    };
548
549    handle_text_event(
550        rss_data,
551        &parse_context,
552        &mut context.current_item,
553        &mut image_data,
554    )?;
555
556    context.image_title = image_data.title;
557    context.image_url = image_data.url;
558    context.image_link = image_data.link;
559
560    // Custom handlers can be applied if necessary
561    apply_custom_handlers(
562        &context.current_element,
563        &text,
564        &context.current_attributes,
565        config,
566    )?;
567
568    Ok(())
569}
570
571/// Processes a CDATA event for the current XML element.
572///
573/// This function handles the processing of CDATA within RSS feeds, ensuring that
574/// CDATA is parsed into the appropriate elements (channels, items, or images).
575///
576/// # Arguments
577///
578/// * `e` - A reference to the `BytesCData` struct representing the CDATA content.
579/// * `context` - A mutable reference to the `ParserContext` struct, which maintains the current parsing state.
580/// * `rss_data` - A mutable reference to the `RssData` struct.
581/// * `config` - Optional configuration for custom parsing behavior.
582fn process_cdata_event(
583    e: &BytesCData<'_>,
584    context: &mut ParserContext,
585    rss_data: &mut RssData,
586    config: Option<&ParserConfig>,
587) -> Result<()> {
588    let text = String::from_utf8_lossy(e.as_ref()).into_owned();
589    let state = context.parsing_state.clone();
590    let parse_context = ParsingContext {
591        is_rss_1_0: matches!(
592            context.rss_version,
593            RssVersionState::Rss1_0
594        ),
595        state,
596        current_element: &context.current_element,
597        text: &text,
598        current_attributes: &context.current_attributes,
599    };
600
601    let mut image_data = ImageData {
602        title: context.image_title.clone(),
603        url: context.image_url.clone(),
604        link: context.image_link.clone(),
605    };
606
607    handle_text_event(
608        rss_data,
609        &parse_context,
610        &mut context.current_item,
611        &mut image_data,
612    )?;
613
614    context.image_title = image_data.title;
615    context.image_url = image_data.url;
616    context.image_link = image_data.link;
617
618    apply_custom_handlers(
619        &context.current_element,
620        &text,
621        &context.current_attributes,
622        config,
623    )?;
624
625    Ok(())
626}
627
628/// Applies custom handlers for RSS elements.
629///
630/// This function checks if any custom handlers are provided in the configuration and applies them to the current element.
631///
632/// # Arguments
633///
634/// * `element` - The current XML element being processed.
635/// * `text` - The text content of the element.
636/// * `attributes` - The attributes of the element.
637/// * `config` - Optional parser configuration containing custom handlers.
638fn apply_custom_handlers(
639    element: &str,
640    text: &str,
641    attributes: &[(String, String)],
642    config: Option<&ParserConfig>,
643) -> Result<()> {
644    if let Some(cfg) = config {
645        for handler in &cfg.custom_handlers {
646            handler.handle_element(element, text, attributes)?;
647        }
648    }
649    Ok(())
650}
651
652/// Enum to represent the RSS version being parsed.
653#[allow(dead_code)]
654enum RssVersionState {
655    Rss1_0,
656    Other,
657}
658
659/// Represents the context of the current XML element being parsed.
660struct ParserContext {
661    rss_version: RssVersionState,
662    parsing_state: ParsingState,
663    current_element: String,
664    current_attributes: Vec<(String, String)>,
665    current_item: RssItem,
666    image_title: String,
667    image_url: String,
668    image_link: String,
669}
670
671impl ParserContext {
672    /// Initialize a new `ParserContext` with default values.
673    pub fn new() -> Self {
674        ParserContext {
675            rss_version: RssVersionState::Other,
676            parsing_state: ParsingState::None,
677            current_element: String::new(),
678            current_attributes: Vec::new(),
679            current_item: RssItem::new(),
680            image_title: String::new(),
681            image_url: String::new(),
682            image_link: String::new(),
683        }
684    }
685}
686
687#[cfg(test)]
688mod tests {
689    use super::*;
690    use std::sync::Arc;
691    use quick_xml::events::BytesText;
692    use quick_xml::events::BytesCData;
693    use quick_xml::events::BytesStart;
694
695    struct MockElementHandler;
696
697    impl ElementHandler for MockElementHandler {
698        fn handle_element(
699            &self,
700            name: &str,
701            text: &str,
702            _attributes: &[(String, String)],
703        ) -> Result<()> {
704            if name == "customElement" && text == "Custom content" {
705                Ok(())
706            } else {
707                Err(RssError::UnknownElement(name.into()))
708            }
709        }
710    }
711
712    #[test]
713    fn test_parser_config_with_custom_handler() {
714        let handler = Arc::new(MockElementHandler);
715        let config = ParserConfig {
716            custom_handlers: vec![handler],
717        };
718
719        assert_eq!(config.custom_handlers.len(), 1);
720        assert!(config.custom_handlers[0]
721            .handle_element("customElement", "Custom content", &[])
722            .is_ok());
723    }
724
725    #[test]
726    fn test_parser_config_no_custom_handlers() {
727        let config = ParserConfig::default();
728        assert!(config.custom_handlers.is_empty());
729    }
730
731    #[test]
732    fn test_process_start_event_empty_name() {
733        let e = BytesStart::new("");
734        let mut context = ParserContext::new();
735        let mut rss_data = RssData::default();
736
737        let result = process_start_event(&e, &mut context, &mut rss_data);
738        assert!(result.is_ok());
739    }
740
741    #[test]
742    fn test_process_start_event_non_empty_name() {
743        let e = BytesStart::new("item");
744        let mut context = ParserContext::new();
745        let mut rss_data = RssData::default();
746
747        let result = process_start_event(&e, &mut context, &mut rss_data);
748        assert!(result.is_ok());
749        assert_eq!(context.current_element, "item");
750    }
751
752    #[test]
753    fn test_process_text_event() {
754        let e = BytesText::from_escaped("Sample Text");
755        let mut context = ParserContext::new();
756        let mut rss_data = RssData::default();
757
758        let result = process_text_event(&e, &mut context, &mut rss_data, None);
759        assert!(result.is_ok());
760    }
761
762    #[test]
763    fn test_process_cdata_event() {
764        let e = BytesCData::new("Sample CDATA");
765        let mut context = ParserContext::new();
766        let mut rss_data = RssData::default();
767
768        let result = process_cdata_event(&e, &mut context, &mut rss_data, None);
769        assert!(result.is_ok());
770    }
771
772    #[test]
773    fn test_parse_channel_rdf_li_rss_1_0() {
774        let mut rss_data = RssData::default();
775        let result = parse_channel_element(&mut rss_data, "rdf:li", "", true);
776        assert!(result.is_ok());
777    }
778
779    #[test]
780    fn test_parse_channel_rdf_li_non_rss_1_0() {
781        let mut rss_data = RssData::default();
782        let result = parse_channel_element(&mut rss_data, "rdf:li", "", false);
783        assert!(result.is_err());
784    }
785
786    #[test]
787    fn test_parse_channel_unknown_element() {
788        let mut rss_data = RssData::default();
789        let result = parse_channel_element(&mut rss_data, "unknownElement", "", false);
790        assert!(result.is_err());
791    }
792
793    #[test]
794    fn test_parse_rss_with_image() {
795        let rss_xml = r#"
796        <?xml version="1.0" encoding="UTF-8"?>
797        <rss version="2.0">
798          <channel>
799            <title>Sample Feed</title>
800            <link>https://example.com</link>
801            <description>A sample RSS feed</description>
802            <image>
803              <title>Sample Image</title>
804              <url>https://example.com/image.jpg</url>
805              <link>https://example.com</link>
806            </image>
807          </channel>
808        </rss>
809        "#;
810
811        let result = parse_rss(rss_xml, None);
812
813        match result {
814            Ok(parsed_data) => {
815                assert_eq!(parsed_data.title, "Sample Feed");
816                assert_eq!(parsed_data.image_title, "Sample Image");
817            }
818            Err(RssError::UnknownElement(element)) => {
819                panic!("Failed due to unknown element: {:?}", element);
820            }
821            Err(e) => panic!("Failed to parse RSS with image: {:?}", e),
822        }
823    }
824
825    #[test]
826    fn test_parse_rss_1_0() {
827        let rss_xml = r#"
828        <?xml version="1.0" encoding="UTF-8"?>
829        <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
830                 xmlns="http://purl.org/rss/1.0/">
831          <channel rdf:about="https://example.com">
832            <title>Sample Feed</title>
833            <link>https://example.com</link>
834            <description>A sample RSS feed</description>
835          </channel>
836        </rdf:RDF>
837        "#;
838
839        let result = parse_rss(rss_xml, None);
840
841        match result {
842            Ok(parsed_data) => {
843                assert_eq!(parsed_data.title, "Sample Feed");
844            }
845            Err(RssError::UnknownElement(element)) => {
846                panic!("Failed due to unknown element: {:?}", element);
847            }
848            Err(e) => panic!("Failed to parse RSS 1.0: {:?}", e),
849        }
850    }
851
852    #[test]
853    fn test_parse_rss_2_0() {
854        let rss_xml = r#"
855        <?xml version="1.0" encoding="UTF-8"?>
856        <rss version="2.0">
857          <channel>
858            <title>Sample Feed</title>
859            <link>https://example.com</link>
860            <description>A sample RSS feed</description>
861          </channel>
862        </rss>
863        "#;
864
865        let result = parse_rss(rss_xml, None);
866
867        match result {
868            Ok(parsed_data) => {
869                assert_eq!(parsed_data.title, "Sample Feed");
870            }
871            Err(RssError::UnknownElement(element)) => {
872                panic!("Failed due to unknown element: {:?}", element);
873            }
874            Err(e) => panic!("Failed to parse RSS 2.0: {:?}", e),
875        }
876    }
877
878    #[test]
879    fn test_parse_channel_language() {
880        let mut rss_data = RssData::default();
881        let result = parse_channel_element(
882            &mut rss_data,
883            "language",
884            "en-US",
885            false,
886        );
887        assert!(result.is_ok());
888        assert_eq!(rss_data.language, "en-US");
889    }
890
891    #[test]
892    fn test_parse_channel_copyright() {
893        let mut rss_data = RssData::default();
894        let result = parse_channel_element(
895            &mut rss_data,
896            "copyright",
897            "© 2024",
898            false,
899        );
900        assert!(result.is_ok());
901        assert_eq!(rss_data.copyright, "© 2024");
902    }
903
904    #[test]
905    fn test_parse_channel_managing_editor() {
906        let mut rss_data = RssData::default();
907        let result = parse_channel_element(
908            &mut rss_data,
909            "managingEditor",
910            "editor@example.com",
911            false,
912        );
913        assert!(result.is_ok());
914        assert_eq!(rss_data.managing_editor, "editor@example.com");
915    }
916
917    #[test]
918    fn test_parse_channel_webmaster() {
919        let mut rss_data = RssData::default();
920        let result = parse_channel_element(
921            &mut rss_data,
922            "webMaster",
923            "webmaster@example.com",
924            false,
925        );
926        assert!(result.is_ok());
927        assert_eq!(rss_data.webmaster, "webmaster@example.com");
928    }
929
930    #[test]
931    fn test_parse_channel_pub_date() {
932        let mut rss_data = RssData::default();
933        let result = parse_channel_element(
934            &mut rss_data,
935            "pubDate",
936            "Mon, 10 Oct 2024 04:00:00 GMT",
937            false,
938        );
939        assert!(result.is_ok());
940        assert_eq!(rss_data.pub_date, "Mon, 10 Oct 2024 04:00:00 GMT");
941    }
942
943    #[test]
944    fn test_parse_channel_last_build_date() {
945        let mut rss_data = RssData::default();
946        let result = parse_channel_element(
947            &mut rss_data,
948            "lastBuildDate",
949            "Mon, 10 Oct 2024 05:00:00 GMT",
950            false,
951        );
952        assert!(result.is_ok());
953        assert_eq!(
954            rss_data.last_build_date,
955            "Mon, 10 Oct 2024 05:00:00 GMT"
956        );
957    }
958
959    #[test]
960    fn test_parse_channel_category() {
961        let mut rss_data = RssData::default();
962        let result = parse_channel_element(
963            &mut rss_data,
964            "category",
965            "Technology",
966            false,
967        );
968        assert!(result.is_ok());
969        assert_eq!(rss_data.category, "Technology");
970    }
971
972    #[test]
973    fn test_parse_channel_generator() {
974        let mut rss_data = RssData::default();
975        let result = parse_channel_element(
976            &mut rss_data,
977            "generator",
978            "RSS Generator v1.0",
979            false,
980        );
981        assert!(result.is_ok());
982        assert_eq!(rss_data.generator, "RSS Generator v1.0");
983    }
984
985    #[test]
986    fn test_parse_channel_docs() {
987        let mut rss_data = RssData::default();
988        let result = parse_channel_element(
989            &mut rss_data,
990            "docs",
991            "https://example.com/rss/docs",
992            false,
993        );
994        assert!(result.is_ok());
995        assert_eq!(rss_data.docs, "https://example.com/rss/docs");
996    }
997
998    #[test]
999    fn test_parse_channel_ttl() {
1000        let mut rss_data = RssData::default();
1001        let result =
1002            parse_channel_element(&mut rss_data, "ttl", "60", false);
1003        assert!(result.is_ok());
1004        assert_eq!(rss_data.ttl, "60");
1005    }
1006
1007    #[test]
1008    fn test_parse_channel_items_rss_1_0() {
1009        let mut rss_data = RssData::default();
1010        let result =
1011            parse_channel_element(&mut rss_data, "items", "", true);
1012        assert!(result.is_ok());
1013    }
1014
1015    #[test]
1016    fn test_parse_channel_items_non_rss_1_0() {
1017        let mut rss_data = RssData::default();
1018        let result =
1019            parse_channel_element(&mut rss_data, "items", "", false);
1020        assert!(result.is_err());
1021    }
1022
1023    #[test]
1024    fn test_parse_channel_rdf_seq_rss_1_0() {
1025        let mut rss_data = RssData::default();
1026        let result =
1027            parse_channel_element(&mut rss_data, "rdf:Seq", "", true);
1028        assert!(result.is_ok());
1029    }
1030
1031    #[test]
1032    fn test_parse_channel_rdf_seq_non_rss_1_0() {
1033        let mut rss_data = RssData::default();
1034        let result =
1035            parse_channel_element(&mut rss_data, "rdf:Seq", "", false);
1036        assert!(result.is_err());
1037    }
1038
1039    #[test]
1040    fn test_parse_item_author() {
1041        let mut item = RssItem::default();
1042        parse_item_element(
1043            &mut item,
1044            "author",
1045            "author@example.com",
1046            &[],
1047        );
1048        assert_eq!(item.author, "author@example.com");
1049    }
1050
1051    #[test]
1052    fn test_parse_item_guid() {
1053        let mut item = RssItem::default();
1054        parse_item_element(&mut item, "guid", "1234-5678", &[]);
1055        assert_eq!(item.guid, "1234-5678");
1056    }
1057
1058    #[test]
1059    fn test_parse_item_pub_date() {
1060        let mut item = RssItem::default();
1061        parse_item_element(
1062            &mut item,
1063            "pubDate",
1064            "Mon, 10 Oct 2024 04:00:00 GMT",
1065            &[],
1066        );
1067        assert_eq!(item.pub_date, "Mon, 10 Oct 2024 04:00:00 GMT");
1068    }
1069
1070    #[test]
1071    fn test_parse_item_category() {
1072        let mut item = RssItem::default();
1073        parse_item_element(&mut item, "category", "Technology", &[]);
1074        assert_eq!(item.category, Some("Technology".to_string()));
1075    }
1076
1077    #[test]
1078    fn test_parse_item_comments() {
1079        let mut item = RssItem::default();
1080        parse_item_element(
1081            &mut item,
1082            "comments",
1083            "https://example.com/comments",
1084            &[],
1085        );
1086        assert_eq!(
1087            item.comments,
1088            Some("https://example.com/comments".to_string())
1089        );
1090    }
1091
1092    #[test]
1093    fn test_parse_item_enclosure_with_attributes() {
1094        let mut item = RssItem::default();
1095        let attributes = vec![
1096            (
1097                "url".to_string(),
1098                "https://example.com/audio.mp3".to_string(),
1099            ),
1100            ("length".to_string(), "123456".to_string()),
1101            ("type".to_string(), "audio/mpeg".to_string()),
1102        ];
1103        parse_item_element(&mut item, "enclosure", "", &attributes);
1104        assert_eq!(
1105            item.enclosure,
1106            Some("url=\"https://example.com/audio.mp3\" length=\"123456\" type=\"audio/mpeg\"".to_string())
1107        );
1108    }
1109
1110    #[test]
1111    fn test_parse_item_enclosure_without_attributes() {
1112        let mut item = RssItem::default();
1113        parse_item_element(&mut item, "enclosure", "", &[]);
1114        assert_eq!(item.enclosure, None);
1115    }
1116
1117    #[test]
1118    fn test_parse_item_source() {
1119        let mut item = RssItem::default();
1120        parse_item_element(
1121            &mut item,
1122            "source",
1123            "https://example.com",
1124            &[],
1125        );
1126        assert_eq!(
1127            item.source,
1128            Some("https://example.com".to_string())
1129        );
1130    }
1131}