1use quick_xml::events::{
46 BytesCData, BytesEnd, BytesStart, BytesText, Event,
47};
48use quick_xml::Reader;
49use std::borrow::Cow;
50use std::sync::Arc;
51
52pub use crate::data::{RssData, RssItem, RssVersion};
53pub use crate::error::{Result, RssError};
54
55pub trait ElementHandler: Send + Sync {
59 fn handle_element(
83 &self,
84 name: &str,
85 text: &str,
86 attributes: &[(String, String)],
87 ) -> Result<()>;
88}
89
90#[derive(Default)]
95pub struct ParserConfig {
96 pub custom_handlers: Vec<Arc<dyn ElementHandler>>,
101}
102
103fn parse_channel_element(
115 rss_data: &mut RssData,
116 element: &str,
117 text: &str,
118 is_rss_1_0: bool,
119) -> Result<()> {
120 match element {
121 "title" => {
122 rss_data.title = text.to_string();
123 Ok(())
124 }
125 "link" => {
126 rss_data.link = text.to_string();
127 Ok(())
128 }
129 "description" => {
130 rss_data.description = text.to_string();
131 Ok(())
132 }
133 "language" => {
134 rss_data.language = text.to_string();
135 Ok(())
136 }
137 "copyright" => {
138 rss_data.copyright = text.to_string();
139 Ok(())
140 }
141 "managingEditor" => {
142 rss_data.managing_editor = text.to_string();
143 Ok(())
144 }
145 "webMaster" => {
146 rss_data.webmaster = text.to_string();
147 Ok(())
148 }
149 "pubDate" => {
150 rss_data.pub_date = text.to_string();
151 Ok(())
152 }
153 "lastBuildDate" => {
154 rss_data.last_build_date = text.to_string();
155 Ok(())
156 }
157 "category" => {
158 rss_data.category = text.to_string();
159 Ok(())
160 }
161 "generator" => {
162 rss_data.generator = text.to_string();
163 Ok(())
164 }
165 "docs" => {
166 rss_data.docs = text.to_string();
167 Ok(())
168 }
169 "ttl" => {
170 rss_data.ttl = text.to_string();
171 Ok(())
172 }
173 "items" => {
175 if is_rss_1_0 {
176 Ok(())
177 } else {
178 Err(RssError::UnknownElement("items".into()))
179 }
180 }
181 "rdf:Seq" => {
182 if is_rss_1_0 {
183 Ok(())
184 } else {
185 Err(RssError::UnknownElement("rdf:Seq".into()))
186 }
187 }
188 "rdf:li" => {
189 if is_rss_1_0 {
190 Ok(())
191 } else {
192 Err(RssError::UnknownElement("rdf:li".into()))
193 }
194 }
195 _ => Err(RssError::UnknownElement(format!(
196 "Unknown channel element: {element}"
197 ))),
198 }
199}
200
201fn parse_item_element(
213 item: &mut RssItem,
214 element: &str,
215 text: &str,
216 attributes: &[(String, String)],
217) {
218 match element {
219 "title" => {
220 item.title = text.to_string();
221 }
222 "link" => {
223 item.link = text.to_string();
224 }
225 "description" => {
226 item.description = text.to_string();
227 }
228 "author" => {
229 item.author = text.to_string();
230 }
231 "guid" => {
232 item.guid = text.to_string();
233 }
234 "pubDate" => {
235 item.pub_date = text.to_string();
236 }
237 "category" => {
238 item.category = Some(text.to_string());
239 }
240 "comments" => {
241 item.comments = Some(text.to_string());
242 }
243 "enclosure" => {
244 if attributes.is_empty() {
245 item.enclosure = None;
246 } else {
247 let enclosure_str = attributes
248 .iter()
249 .map(|(k, v)| format!("{k}=\"{v}\""))
250 .collect::<Vec<String>>()
251 .join(" ");
252 item.enclosure = Some(enclosure_str);
253 }
254 }
255 "source" => {
256 item.source = Some(text.to_string());
257 }
258 _ => (), }
260}
261
262#[derive(Clone)]
264enum ParsingState {
265 Channel,
266 Item,
267 Image,
268 None, }
270
271struct ParsingContext<'a> {
273 is_rss_1_0: bool,
274 state: ParsingState,
275 current_element: &'a str,
276 text: &'a str,
277 current_attributes: &'a [(String, String)],
278}
279
280impl ParsingContext<'_> {
281 pub(crate) fn in_channel(&self) -> bool {
283 matches!(self.state, ParsingState::Channel)
284 }
285
286 pub(crate) fn in_item(&self) -> bool {
288 matches!(self.state, ParsingState::Item)
289 }
290
291 pub(crate) fn in_image(&self) -> bool {
293 matches!(self.state, ParsingState::Image)
294 }
295}
296
297struct ImageData {
299 title: String,
300 url: String,
301 link: String,
302}
303
304fn handle_text_event(
320 rss_data: &mut RssData,
321 context: &ParsingContext,
322 current_item: &mut RssItem,
323 image_data: &mut ImageData,
324) -> Result<()> {
325 if context.in_channel() && !context.in_item() && !context.in_image()
326 {
327 if !context.current_element.is_empty() {
328 parse_channel_element(
329 rss_data,
330 context.current_element,
331 &Cow::Owned(context.text.to_string()),
332 context.is_rss_1_0,
333 )?;
334 }
335 } else if context.in_item() && !context.current_element.is_empty() {
336 parse_item_element(
337 current_item,
338 context.current_element,
339 context.text,
340 context.current_attributes,
341 );
342 } else if context.in_image() && !context.current_element.is_empty()
343 {
344 match context.current_element {
345 "title" => image_data.title = context.text.to_string(),
346 "url" => image_data.url = context.text.to_string(),
347 "link" => image_data.link = context.text.to_string(),
348 _ => (),
349 }
350 }
351 Ok(())
352}
353
354pub fn parse_rss(
377 xml_content: &str,
378 config: Option<&ParserConfig>,
379) -> Result<RssData> {
380 let mut reader = Reader::from_str(xml_content);
381 let mut rss_data = RssData::new(None);
382 let mut buf = Vec::with_capacity(1024);
383 let mut context = ParserContext::new();
384
385 loop {
386 match reader.read_event_into(&mut buf) {
387 Ok(Event::Start(ref e)) => {
388 process_start_event(e, &mut context, &mut rss_data)?;
389 }
390 Ok(Event::End(ref e)) => {
391 process_end_event(e, &mut context, &mut rss_data);
392 }
393 Ok(Event::Text(ref e)) => process_text_event(
394 e,
395 &mut context,
396 &mut rss_data,
397 config,
398 )?,
399 Ok(Event::CData(ref e)) => process_cdata_event(
400 e,
401 &mut context,
402 &mut rss_data,
403 config,
404 )?,
405 Ok(Event::Eof) => break Ok(rss_data),
406 Err(e) => return Err(RssError::XmlParseError(e)),
407 _ => (),
408 }
409 buf.clear();
410 }
411}
412
413fn process_start_event(
425 e: &BytesStart<'_>,
426 context: &mut ParserContext,
427 _rss_data: &mut RssData,
428) -> Result<()> {
429 let name_str = String::from_utf8_lossy(e.name().0).into_owned();
430 if name_str.is_empty() {
431 return Ok(());
432 }
433
434 match name_str.as_str() {
436 "rss" | "rdf:RDF" => {
437 return Ok(());
439 }
440 "channel" => {
441 context.parsing_state = ParsingState::Channel;
443 return Ok(());
444 }
445 "item" => {
446 context.parsing_state = ParsingState::Item;
447 context.current_item = RssItem::new();
448 }
449 "image" => {
450 context.parsing_state = ParsingState::Image;
451 }
452 _ => {
453 if !matches!(
455 context.parsing_state,
456 ParsingState::Item
457 | ParsingState::Channel
458 | ParsingState::Image
459 ) {
460 return Err(RssError::UnknownElement(format!(
461 "Unknown element: {name_str}"
462 )));
463 }
464 }
465 }
466
467 context.current_element = name_str;
469 context.current_attributes = e
470 .attributes()
471 .filter_map(std::result::Result::ok)
472 .map(|a| {
473 (
474 String::from_utf8_lossy(a.key.0).into_owned(),
475 String::from_utf8_lossy(&a.value).into_owned(),
476 )
477 })
478 .collect();
479
480 Ok(())
481}
482
483fn process_end_event(
494 e: &BytesEnd<'_>,
495 context: &mut ParserContext,
496 rss_data: &mut RssData,
497) {
498 let name = e.name().0.to_vec();
499 if name == b"channel" {
500 if matches!(context.parsing_state, ParsingState::Channel) {
501 context.parsing_state = ParsingState::None;
502 }
503 } else if name == b"item" {
504 if matches!(context.parsing_state, ParsingState::Item) {
505 context.parsing_state = ParsingState::None;
506 rss_data.add_item(context.current_item.clone());
507 }
508 } else if name == b"image"
509 && matches!(context.parsing_state, ParsingState::Image)
510 {
511 context.parsing_state = ParsingState::None;
512 rss_data.set_image(
513 &context.image_title.clone(),
514 &context.image_url.clone(),
515 &context.image_link.clone(),
516 );
517 }
518 context.current_element.clear();
519 context.current_attributes.clear();
520}
521
522fn process_text_event(
523 e: &BytesText<'_>,
524 context: &mut ParserContext,
525 rss_data: &mut RssData,
526 config: Option<&ParserConfig>,
527) -> Result<()> {
528 let decoded = e
529 .decode()
530 .map_err(|err| RssError::Custom(err.to_string()))?;
531 let text = quick_xml::escape::unescape(&decoded)
532 .map_err(|err| RssError::Custom(err.to_string()))?
533 .into_owned();
534
535 let parse_context = ParsingContext {
536 is_rss_1_0: matches!(
537 context.rss_version,
538 RssVersionState::Rss1_0
539 ),
540 state: context.parsing_state.clone(),
541 current_element: &context.current_element,
542 text: &text,
543 current_attributes: &context.current_attributes,
544 };
545
546 let mut image_data = ImageData {
547 title: context.image_title.clone(),
548 url: context.image_url.clone(),
549 link: context.image_link.clone(),
550 };
551
552 handle_text_event(
553 rss_data,
554 &parse_context,
555 &mut context.current_item,
556 &mut image_data,
557 )?;
558
559 context.image_title = image_data.title;
560 context.image_url = image_data.url;
561 context.image_link = image_data.link;
562
563 apply_custom_handlers(
565 &context.current_element,
566 &text,
567 &context.current_attributes,
568 config,
569 )?;
570
571 Ok(())
572}
573
574fn process_cdata_event(
586 e: &BytesCData<'_>,
587 context: &mut ParserContext,
588 rss_data: &mut RssData,
589 config: Option<&ParserConfig>,
590) -> Result<()> {
591 let text = String::from_utf8_lossy(e.as_ref()).into_owned();
592 let state = context.parsing_state.clone();
593 let parse_context = ParsingContext {
594 is_rss_1_0: matches!(
595 context.rss_version,
596 RssVersionState::Rss1_0
597 ),
598 state,
599 current_element: &context.current_element,
600 text: &text,
601 current_attributes: &context.current_attributes,
602 };
603
604 let mut image_data = ImageData {
605 title: context.image_title.clone(),
606 url: context.image_url.clone(),
607 link: context.image_link.clone(),
608 };
609
610 handle_text_event(
611 rss_data,
612 &parse_context,
613 &mut context.current_item,
614 &mut image_data,
615 )?;
616
617 context.image_title = image_data.title;
618 context.image_url = image_data.url;
619 context.image_link = image_data.link;
620
621 apply_custom_handlers(
622 &context.current_element,
623 &text,
624 &context.current_attributes,
625 config,
626 )?;
627
628 Ok(())
629}
630
631fn apply_custom_handlers(
642 element: &str,
643 text: &str,
644 attributes: &[(String, String)],
645 config: Option<&ParserConfig>,
646) -> Result<()> {
647 if let Some(cfg) = config {
648 for handler in &cfg.custom_handlers {
649 handler.handle_element(element, text, attributes)?;
650 }
651 }
652 Ok(())
653}
654
655#[allow(dead_code)]
657enum RssVersionState {
658 Rss1_0,
659 Other,
660}
661
662struct ParserContext {
664 rss_version: RssVersionState,
665 parsing_state: ParsingState,
666 current_element: String,
667 current_attributes: Vec<(String, String)>,
668 current_item: RssItem,
669 image_title: String,
670 image_url: String,
671 image_link: String,
672}
673
674impl ParserContext {
675 pub(crate) fn new() -> Self {
677 ParserContext {
678 rss_version: RssVersionState::Other,
679 parsing_state: ParsingState::None,
680 current_element: String::new(),
681 current_attributes: Vec::new(),
682 current_item: RssItem::new(),
683 image_title: String::new(),
684 image_url: String::new(),
685 image_link: String::new(),
686 }
687 }
688}
689
690#[cfg(test)]
691mod tests {
692 use super::*;
693 use quick_xml::events::BytesCData;
694 use quick_xml::events::BytesStart;
695 use quick_xml::events::BytesText;
696 use std::sync::Arc;
697
698 struct MockElementHandler;
699
700 impl ElementHandler for MockElementHandler {
701 fn handle_element(
702 &self,
703 name: &str,
704 text: &str,
705 _attributes: &[(String, String)],
706 ) -> Result<()> {
707 if name == "customElement" && text == "Custom content" {
708 Ok(())
709 } else {
710 Err(RssError::UnknownElement(name.into()))
711 }
712 }
713 }
714
715 #[test]
716 fn test_parser_config_with_custom_handler() {
717 let handler = Arc::new(MockElementHandler);
718 let config = ParserConfig {
719 custom_handlers: vec![handler],
720 };
721
722 assert_eq!(config.custom_handlers.len(), 1);
723 assert!(config.custom_handlers[0]
724 .handle_element("customElement", "Custom content", &[])
725 .is_ok());
726 }
727
728 #[test]
729 fn test_parser_config_no_custom_handlers() {
730 let config = ParserConfig::default();
731 assert!(config.custom_handlers.is_empty());
732 }
733
734 #[test]
735 fn test_process_start_event_empty_name() {
736 let e = BytesStart::new("");
737 let mut context = ParserContext::new();
738 let mut rss_data = RssData::default();
739
740 let result =
741 process_start_event(&e, &mut context, &mut rss_data);
742 assert!(result.is_ok());
743 }
744
745 #[test]
746 fn test_process_start_event_non_empty_name() {
747 let e = BytesStart::new("item");
748 let mut context = ParserContext::new();
749 let mut rss_data = RssData::default();
750
751 let result =
752 process_start_event(&e, &mut context, &mut rss_data);
753 assert!(result.is_ok());
754 assert_eq!(context.current_element, "item");
755 }
756
757 #[test]
758 fn test_process_text_event() {
759 let e = BytesText::from_escaped("Sample Text");
760 let mut context = ParserContext::new();
761 let mut rss_data = RssData::default();
762
763 let result =
764 process_text_event(&e, &mut context, &mut rss_data, None);
765 assert!(result.is_ok());
766 }
767
768 #[test]
769 fn test_process_cdata_event() {
770 let e = BytesCData::new("Sample CDATA");
771 let mut context = ParserContext::new();
772 let mut rss_data = RssData::default();
773
774 let result =
775 process_cdata_event(&e, &mut context, &mut rss_data, None);
776 assert!(result.is_ok());
777 }
778
779 #[test]
780 fn test_parse_channel_rdf_li_rss_1_0() {
781 let mut rss_data = RssData::default();
782 let result =
783 parse_channel_element(&mut rss_data, "rdf:li", "", true);
784 assert!(result.is_ok());
785 }
786
787 #[test]
788 fn test_parse_channel_rdf_li_non_rss_1_0() {
789 let mut rss_data = RssData::default();
790 let result =
791 parse_channel_element(&mut rss_data, "rdf:li", "", false);
792 assert!(result.is_err());
793 }
794
795 #[test]
796 fn test_parse_channel_unknown_element() {
797 let mut rss_data = RssData::default();
798 let result = parse_channel_element(
799 &mut rss_data,
800 "unknownElement",
801 "",
802 false,
803 );
804 assert!(result.is_err());
805 }
806
807 #[test]
808 fn test_parse_rss_with_image() {
809 let rss_xml = r#"
810 <?xml version="1.0" encoding="UTF-8"?>
811 <rss version="2.0">
812 <channel>
813 <title>Sample Feed</title>
814 <link>https://example.com</link>
815 <description>A sample RSS feed</description>
816 <image>
817 <title>Sample Image</title>
818 <url>https://example.com/image.jpg</url>
819 <link>https://example.com</link>
820 </image>
821 </channel>
822 </rss>
823 "#;
824
825 let result = parse_rss(rss_xml, None);
826
827 match result {
828 Ok(parsed_data) => {
829 assert_eq!(parsed_data.title, "Sample Feed");
830 assert_eq!(parsed_data.image_title, "Sample Image");
831 }
832 Err(RssError::UnknownElement(element)) => {
833 panic!("Failed due to unknown element: {element:?}");
834 }
835 Err(e) => panic!("Failed to parse RSS with image: {e:?}"),
836 }
837 }
838
839 #[test]
840 fn test_parse_rss_1_0() {
841 let rss_xml = r#"
842 <?xml version="1.0" encoding="UTF-8"?>
843 <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
844 xmlns="http://purl.org/rss/1.0/">
845 <channel rdf:about="https://example.com">
846 <title>Sample Feed</title>
847 <link>https://example.com</link>
848 <description>A sample RSS feed</description>
849 </channel>
850 </rdf:RDF>
851 "#;
852
853 let result = parse_rss(rss_xml, None);
854
855 match result {
856 Ok(parsed_data) => {
857 assert_eq!(parsed_data.title, "Sample Feed");
858 }
859 Err(RssError::UnknownElement(element)) => {
860 panic!("Failed due to unknown element: {element:?}");
861 }
862 Err(e) => panic!("Failed to parse RSS 1.0: {e:?}"),
863 }
864 }
865
866 #[test]
867 fn test_parse_rss_2_0() {
868 let rss_xml = r#"
869 <?xml version="1.0" encoding="UTF-8"?>
870 <rss version="2.0">
871 <channel>
872 <title>Sample Feed</title>
873 <link>https://example.com</link>
874 <description>A sample RSS feed</description>
875 </channel>
876 </rss>
877 "#;
878
879 let result = parse_rss(rss_xml, None);
880
881 match result {
882 Ok(parsed_data) => {
883 assert_eq!(parsed_data.title, "Sample Feed");
884 }
885 Err(RssError::UnknownElement(element)) => {
886 panic!("Failed due to unknown element: {element:?}");
887 }
888 Err(e) => panic!("Failed to parse RSS 2.0: {e:?}"),
889 }
890 }
891
892 #[test]
893 fn test_parse_channel_language() {
894 let mut rss_data = RssData::default();
895 let result = parse_channel_element(
896 &mut rss_data,
897 "language",
898 "en-US",
899 false,
900 );
901 assert!(result.is_ok());
902 assert_eq!(rss_data.language, "en-US");
903 }
904
905 #[test]
906 fn test_parse_channel_copyright() {
907 let mut rss_data = RssData::default();
908 let result = parse_channel_element(
909 &mut rss_data,
910 "copyright",
911 "© 2024",
912 false,
913 );
914 assert!(result.is_ok());
915 assert_eq!(rss_data.copyright, "© 2024");
916 }
917
918 #[test]
919 fn test_parse_channel_managing_editor() {
920 let mut rss_data = RssData::default();
921 let result = parse_channel_element(
922 &mut rss_data,
923 "managingEditor",
924 "editor@example.com",
925 false,
926 );
927 assert!(result.is_ok());
928 assert_eq!(rss_data.managing_editor, "editor@example.com");
929 }
930
931 #[test]
932 fn test_parse_channel_webmaster() {
933 let mut rss_data = RssData::default();
934 let result = parse_channel_element(
935 &mut rss_data,
936 "webMaster",
937 "webmaster@example.com",
938 false,
939 );
940 assert!(result.is_ok());
941 assert_eq!(rss_data.webmaster, "webmaster@example.com");
942 }
943
944 #[test]
945 fn test_parse_channel_pub_date() {
946 let mut rss_data = RssData::default();
947 let result = parse_channel_element(
948 &mut rss_data,
949 "pubDate",
950 "Mon, 10 Oct 2024 04:00:00 GMT",
951 false,
952 );
953 assert!(result.is_ok());
954 assert_eq!(rss_data.pub_date, "Mon, 10 Oct 2024 04:00:00 GMT");
955 }
956
957 #[test]
958 fn test_parse_channel_last_build_date() {
959 let mut rss_data = RssData::default();
960 let result = parse_channel_element(
961 &mut rss_data,
962 "lastBuildDate",
963 "Mon, 10 Oct 2024 05:00:00 GMT",
964 false,
965 );
966 assert!(result.is_ok());
967 assert_eq!(
968 rss_data.last_build_date,
969 "Mon, 10 Oct 2024 05:00:00 GMT"
970 );
971 }
972
973 #[test]
974 fn test_parse_channel_category() {
975 let mut rss_data = RssData::default();
976 let result = parse_channel_element(
977 &mut rss_data,
978 "category",
979 "Technology",
980 false,
981 );
982 assert!(result.is_ok());
983 assert_eq!(rss_data.category, "Technology");
984 }
985
986 #[test]
987 fn test_parse_channel_generator() {
988 let mut rss_data = RssData::default();
989 let result = parse_channel_element(
990 &mut rss_data,
991 "generator",
992 "RSS Generator v1.0",
993 false,
994 );
995 assert!(result.is_ok());
996 assert_eq!(rss_data.generator, "RSS Generator v1.0");
997 }
998
999 #[test]
1000 fn test_parse_channel_docs() {
1001 let mut rss_data = RssData::default();
1002 let result = parse_channel_element(
1003 &mut rss_data,
1004 "docs",
1005 "https://example.com/rss/docs",
1006 false,
1007 );
1008 assert!(result.is_ok());
1009 assert_eq!(rss_data.docs, "https://example.com/rss/docs");
1010 }
1011
1012 #[test]
1013 fn test_parse_channel_ttl() {
1014 let mut rss_data = RssData::default();
1015 let result =
1016 parse_channel_element(&mut rss_data, "ttl", "60", false);
1017 assert!(result.is_ok());
1018 assert_eq!(rss_data.ttl, "60");
1019 }
1020
1021 #[test]
1022 fn test_parse_channel_items_rss_1_0() {
1023 let mut rss_data = RssData::default();
1024 let result =
1025 parse_channel_element(&mut rss_data, "items", "", true);
1026 assert!(result.is_ok());
1027 }
1028
1029 #[test]
1030 fn test_parse_channel_items_non_rss_1_0() {
1031 let mut rss_data = RssData::default();
1032 let result =
1033 parse_channel_element(&mut rss_data, "items", "", false);
1034 assert!(result.is_err());
1035 }
1036
1037 #[test]
1038 fn test_parse_channel_rdf_seq_rss_1_0() {
1039 let mut rss_data = RssData::default();
1040 let result =
1041 parse_channel_element(&mut rss_data, "rdf:Seq", "", true);
1042 assert!(result.is_ok());
1043 }
1044
1045 #[test]
1046 fn test_parse_channel_rdf_seq_non_rss_1_0() {
1047 let mut rss_data = RssData::default();
1048 let result =
1049 parse_channel_element(&mut rss_data, "rdf:Seq", "", false);
1050 assert!(result.is_err());
1051 }
1052
1053 #[test]
1054 fn test_parse_item_author() {
1055 let mut item = RssItem::default();
1056 parse_item_element(
1057 &mut item,
1058 "author",
1059 "author@example.com",
1060 &[],
1061 );
1062 assert_eq!(item.author, "author@example.com");
1063 }
1064
1065 #[test]
1066 fn test_parse_item_guid() {
1067 let mut item = RssItem::default();
1068 parse_item_element(&mut item, "guid", "1234-5678", &[]);
1069 assert_eq!(item.guid, "1234-5678");
1070 }
1071
1072 #[test]
1073 fn test_parse_item_pub_date() {
1074 let mut item = RssItem::default();
1075 parse_item_element(
1076 &mut item,
1077 "pubDate",
1078 "Mon, 10 Oct 2024 04:00:00 GMT",
1079 &[],
1080 );
1081 assert_eq!(item.pub_date, "Mon, 10 Oct 2024 04:00:00 GMT");
1082 }
1083
1084 #[test]
1085 fn test_parse_item_category() {
1086 let mut item = RssItem::default();
1087 parse_item_element(&mut item, "category", "Technology", &[]);
1088 assert_eq!(item.category, Some("Technology".to_string()));
1089 }
1090
1091 #[test]
1092 fn test_parse_item_comments() {
1093 let mut item = RssItem::default();
1094 parse_item_element(
1095 &mut item,
1096 "comments",
1097 "https://example.com/comments",
1098 &[],
1099 );
1100 assert_eq!(
1101 item.comments,
1102 Some("https://example.com/comments".to_string())
1103 );
1104 }
1105
1106 #[test]
1107 fn test_parse_item_enclosure_with_attributes() {
1108 let mut item = RssItem::default();
1109 let attributes = vec![
1110 (
1111 "url".to_string(),
1112 "https://example.com/audio.mp3".to_string(),
1113 ),
1114 ("length".to_string(), "123456".to_string()),
1115 ("type".to_string(), "audio/mpeg".to_string()),
1116 ];
1117 parse_item_element(&mut item, "enclosure", "", &attributes);
1118 assert_eq!(
1119 item.enclosure,
1120 Some("url=\"https://example.com/audio.mp3\" length=\"123456\" type=\"audio/mpeg\"".to_string())
1121 );
1122 }
1123
1124 #[test]
1125 fn test_parse_item_enclosure_without_attributes() {
1126 let mut item = RssItem::default();
1127 parse_item_element(&mut item, "enclosure", "", &[]);
1128 assert_eq!(item.enclosure, None);
1129 }
1130
1131 #[test]
1132 fn test_parse_item_source() {
1133 let mut item = RssItem::default();
1134 parse_item_element(
1135 &mut item,
1136 "source",
1137 "https://example.com",
1138 &[],
1139 );
1140 assert_eq!(
1141 item.source,
1142 Some("https://example.com".to_string())
1143 );
1144 }
1145
1146 #[test]
1147 fn test_process_text_event_in_channel() {
1148 let e = BytesText::from_escaped("Channel Title");
1149 let mut context = ParserContext::new();
1150 context.parsing_state = ParsingState::Channel;
1151 context.current_element = "title".to_string();
1152 let mut rss_data = RssData::default();
1153
1154 let result =
1155 process_text_event(&e, &mut context, &mut rss_data, None);
1156 assert!(result.is_ok());
1157 assert_eq!(rss_data.title, "Channel Title");
1158 }
1159
1160 #[test]
1161 fn test_process_text_event_in_item() {
1162 let e = BytesText::from_escaped("Item Title");
1163 let mut context = ParserContext::new();
1164 context.parsing_state = ParsingState::Item;
1165 context.current_element = "title".to_string();
1166 let mut rss_data = RssData::default();
1167
1168 let result =
1169 process_text_event(&e, &mut context, &mut rss_data, None);
1170 assert!(result.is_ok());
1171 assert_eq!(context.current_item.title, "Item Title");
1172 }
1173
1174 #[test]
1175 fn test_process_cdata_event_in_channel() {
1176 let e = BytesCData::new("CDATA Description");
1177 let mut context = ParserContext::new();
1178 context.parsing_state = ParsingState::Channel;
1179 context.current_element = "description".to_string();
1180 let mut rss_data = RssData::default();
1181
1182 let result =
1183 process_cdata_event(&e, &mut context, &mut rss_data, None);
1184 assert!(result.is_ok());
1185 assert_eq!(rss_data.description, "CDATA Description");
1186 }
1187
1188 #[test]
1189 fn test_process_cdata_event_in_item() {
1190 let e = BytesCData::new("CDATA Item Desc");
1191 let mut context = ParserContext::new();
1192 context.parsing_state = ParsingState::Item;
1193 context.current_element = "description".to_string();
1194 let mut rss_data = RssData::default();
1195
1196 let result =
1197 process_cdata_event(&e, &mut context, &mut rss_data, None);
1198 assert!(result.is_ok());
1199 assert_eq!(context.current_item.description, "CDATA Item Desc");
1200 }
1201
1202 #[test]
1203 fn test_process_text_event_with_custom_handler() {
1204 let handler = Arc::new(MockElementHandler);
1205 let config = ParserConfig {
1206 custom_handlers: vec![handler],
1207 };
1208
1209 let e = BytesText::from_escaped("Custom content");
1210 let mut context = ParserContext::new();
1211 context.current_element = "customElement".to_string();
1214 let mut rss_data = RssData::default();
1215
1216 let result = process_text_event(
1217 &e,
1218 &mut context,
1219 &mut rss_data,
1220 Some(&config),
1221 );
1222 assert!(result.is_ok());
1223 }
1224
1225 #[test]
1226 fn test_parse_rss_with_cdata() {
1227 let rss_xml = r#"
1228 <?xml version="1.0" encoding="UTF-8"?>
1229 <rss version="2.0">
1230 <channel>
1231 <title>CDATA Feed</title>
1232 <link>https://example.com</link>
1233 <description><![CDATA[A feed with <b>CDATA</b> content]]></description>
1234 <item>
1235 <title><![CDATA[CDATA Item]]></title>
1236 <link>https://example.com/item1</link>
1237 <description><![CDATA[Item with <em>HTML</em>]]></description>
1238 </item>
1239 </channel>
1240 </rss>
1241 "#;
1242
1243 let result = parse_rss(rss_xml, None);
1244 assert!(result.is_ok());
1245 let data = result.unwrap();
1246 assert_eq!(data.title, "CDATA Feed");
1247 assert!(data.description.contains("CDATA"));
1248 assert_eq!(data.items.len(), 1);
1249 assert_eq!(data.items[0].title, "CDATA Item");
1250 }
1251
1252 #[test]
1253 fn test_process_text_event_with_escaped_entities() {
1254 let e = BytesText::from_escaped("& < >");
1255 let mut context = ParserContext::new();
1256 context.parsing_state = ParsingState::Channel;
1257 context.current_element = "title".to_string();
1258 let mut rss_data = RssData::default();
1259
1260 let result =
1261 process_text_event(&e, &mut context, &mut rss_data, None);
1262 assert!(result.is_ok());
1263 assert_eq!(rss_data.title, "& < >");
1265 }
1266
1267 #[test]
1268 fn test_process_start_event_unknown_element_outside_context() {
1269 let e = BytesStart::new("unknownRoot");
1270 let mut context = ParserContext::new();
1271 context.parsing_state = ParsingState::None;
1273 let mut rss_data = RssData::default();
1274
1275 let result =
1276 process_start_event(&e, &mut context, &mut rss_data);
1277 assert!(result.is_err());
1278 }
1279
1280 #[test]
1281 fn test_parse_rss_with_all_channel_fields() {
1282 let rss_xml = r#"
1283 <?xml version="1.0" encoding="UTF-8"?>
1284 <rss version="2.0">
1285 <channel>
1286 <title>Full Channel</title>
1287 <link>https://example.com</link>
1288 <description>A complete channel</description>
1289 <language>en-US</language>
1290 <copyright>2024</copyright>
1291 <managingEditor>editor@example.com</managingEditor>
1292 <webMaster>webmaster@example.com</webMaster>
1293 <pubDate>Mon, 01 Jan 2024 00:00:00 GMT</pubDate>
1294 <lastBuildDate>Mon, 01 Jan 2024 00:00:00 GMT</lastBuildDate>
1295 <category>Technology</category>
1296 <generator>Test Generator</generator>
1297 <docs>https://example.com/docs</docs>
1298 <ttl>60</ttl>
1299 <item>
1300 <title>Item 1</title>
1301 <link>https://example.com/item1</link>
1302 <description>First item</description>
1303 <author>author@example.com</author>
1304 <category>Cat1</category>
1305 <comments>https://example.com/item1/comments</comments>
1306 <source>https://example.com</source>
1307 </item>
1308 </channel>
1309 </rss>
1310 "#;
1311
1312 let result = parse_rss(rss_xml, None);
1313 assert!(result.is_ok());
1314 let data = result.unwrap();
1315 assert_eq!(data.title, "Full Channel");
1316 assert_eq!(data.language, "en-US");
1317 assert_eq!(data.copyright, "2024");
1318 assert_eq!(data.managing_editor, "editor@example.com");
1319 assert_eq!(data.webmaster, "webmaster@example.com");
1320 assert_eq!(data.category, "Technology");
1321 assert_eq!(data.generator, "Test Generator");
1322 assert_eq!(data.docs, "https://example.com/docs");
1323 assert_eq!(data.ttl, "60");
1324 assert_eq!(data.items.len(), 1);
1325 assert_eq!(data.items[0].author, "author@example.com");
1326 assert_eq!(data.items[0].category, Some("Cat1".to_string()));
1327 assert_eq!(
1328 data.items[0].comments,
1329 Some("https://example.com/item1/comments".to_string())
1330 );
1331 assert_eq!(
1332 data.items[0].source,
1333 Some("https://example.com".to_string())
1334 );
1335 }
1336
1337 #[test]
1338 fn test_parse_rss_malformed_xml() {
1339 let xml = "<rss><channel><title>Test</unclosed";
1340 let result = parse_rss(xml, None);
1341 assert!(result.is_err());
1342 }
1343
1344 #[test]
1345 fn test_parse_rss_with_cdata_in_image() {
1346 let rss_xml = r#"<?xml version="1.0" encoding="UTF-8"?>
1347 <rss version="2.0">
1348 <channel>
1349 <title>Test Feed</title>
1350 <link>https://example.com</link>
1351 <description>Test</description>
1352 <image>
1353 <title><![CDATA[Image Title]]></title>
1354 <url><![CDATA[https://example.com/image.png]]></url>
1355 <link><![CDATA[https://example.com]]></link>
1356 </image>
1357 <item>
1358 <title>Item 1</title>
1359 <link>https://example.com/1</link>
1360 <description>Desc</description>
1361 </item>
1362 </channel>
1363 </rss>
1364 "#;
1365
1366 let result = parse_rss(rss_xml, None);
1367 assert!(result.is_ok());
1368 let data = result.unwrap();
1369 assert_eq!(data.image_title, "Image Title");
1370 assert_eq!(data.image_url, "https://example.com/image.png");
1371 assert_eq!(data.image_link, "https://example.com");
1372 }
1373
1374 #[test]
1375 fn test_parse_rss_with_cdata_in_item() {
1376 let rss_xml = r#"<?xml version="1.0" encoding="UTF-8"?>
1377 <rss version="2.0">
1378 <channel>
1379 <title>Test Feed</title>
1380 <link>https://example.com</link>
1381 <description>Test</description>
1382 <item>
1383 <title><![CDATA[CDATA Item Title]]></title>
1384 <link>https://example.com/1</link>
1385 <description><![CDATA[<p>HTML content</p>]]></description>
1386 </item>
1387 </channel>
1388 </rss>
1389 "#;
1390
1391 let result = parse_rss(rss_xml, None);
1392 assert!(result.is_ok());
1393 let data = result.unwrap();
1394 assert_eq!(data.items[0].title, "CDATA Item Title");
1395 assert!(data.items[0].description.contains("HTML content"));
1396 }
1397
1398 #[test]
1399 fn test_process_text_event_with_failing_custom_handler() {
1400 let rss_xml = r#"<?xml version="1.0" encoding="UTF-8"?>
1401 <rss version="2.0">
1402 <channel>
1403 <title>Test Feed</title>
1404 <link>https://example.com</link>
1405 <description>Test</description>
1406 <item>
1407 <title>Item</title>
1408 <link>https://example.com/1</link>
1409 <description>Desc</description>
1410 <unknownField>value</unknownField>
1411 </item>
1412 </channel>
1413 </rss>
1414 "#;
1415
1416 let handler = Arc::new(MockElementHandler);
1417 let config = ParserConfig {
1418 custom_handlers: vec![handler],
1419 };
1420
1421 let result = parse_rss(rss_xml, Some(&config));
1422 assert!(result.is_err());
1424 }
1425
1426 #[test]
1427 fn test_parse_element_with_attributes() {
1428 let rss_xml = r#"<?xml version="1.0" encoding="UTF-8"?>
1429 <rss version="2.0">
1430 <channel>
1431 <title>Test Feed</title>
1432 <link>https://example.com</link>
1433 <description>Test</description>
1434 <item>
1435 <title>Item</title>
1436 <link href="https://example.com/1">https://example.com/1</link>
1437 <description>Desc</description>
1438 <enclosure url="https://example.com/audio.mp3" length="12345" type="audio/mpeg"/>
1439 </item>
1440 </channel>
1441 </rss>
1442 "#;
1443
1444 let result = parse_rss(rss_xml, None);
1445 assert!(result.is_ok());
1446 }
1447
1448 #[test]
1449 fn test_cdata_event_channel_elements() {
1450 let rss_xml = r#"<?xml version="1.0" encoding="UTF-8"?>
1451 <rss version="2.0">
1452 <channel>
1453 <title><![CDATA[CDATA Channel Title]]></title>
1454 <link>https://example.com</link>
1455 <description><![CDATA[CDATA Description]]></description>
1456 <item>
1457 <title>Item</title>
1458 <link>https://example.com/1</link>
1459 <description>Desc</description>
1460 </item>
1461 </channel>
1462 </rss>
1463 "#;
1464
1465 let result = parse_rss(rss_xml, None);
1466 assert!(result.is_ok());
1467 let data = result.unwrap();
1468 assert_eq!(data.title, "CDATA Channel Title");
1469 assert_eq!(data.description, "CDATA Description");
1470 }
1471}
1472
1473impl std::fmt::Debug for ParserConfig {
1474 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1475 f.debug_struct("ParserConfig")
1476 .field(
1477 "custom_handlers",
1478 &format!("[{} handlers]", self.custom_handlers.len()),
1479 )
1480 .finish()
1481 }
1482}