1use quick_xml::events::{
46 BytesCData, BytesEnd, BytesStart, BytesText, Event,
47};
48use quick_xml::Reader;
49use std::borrow::Cow;
50use std::sync::Arc;
51
52pub use crate::data::{RssData, RssItem, RssVersion};
53pub use crate::error::{Result, RssError};
54
55pub trait ElementHandler: Send + Sync {
59 fn handle_element(
83 &self,
84 name: &str,
85 text: &str,
86 attributes: &[(String, String)],
87 ) -> Result<()>;
88}
89
90#[derive(Default)]
95pub struct ParserConfig {
96 pub custom_handlers: Vec<Arc<dyn ElementHandler>>,
101}
102
103fn parse_channel_element(
115 rss_data: &mut RssData,
116 element: &str,
117 text: &str,
118 is_rss_1_0: bool,
119) -> Result<()> {
120 match element {
121 "title" => {
122 rss_data.title = text.to_string();
123 Ok(())
124 }
125 "link" => {
126 rss_data.link = text.to_string();
127 Ok(())
128 }
129 "description" => {
130 rss_data.description = text.to_string();
131 Ok(())
132 }
133 "language" => {
134 rss_data.language = text.to_string();
135 Ok(())
136 }
137 "copyright" => {
138 rss_data.copyright = text.to_string();
139 Ok(())
140 }
141 "managingEditor" => {
142 rss_data.managing_editor = text.to_string();
143 Ok(())
144 }
145 "webMaster" => {
146 rss_data.webmaster = text.to_string();
147 Ok(())
148 }
149 "pubDate" => {
150 rss_data.pub_date = text.to_string();
151 Ok(())
152 }
153 "lastBuildDate" => {
154 rss_data.last_build_date = text.to_string();
155 Ok(())
156 }
157 "category" => {
158 rss_data.category = text.to_string();
159 Ok(())
160 }
161 "generator" => {
162 rss_data.generator = text.to_string();
163 Ok(())
164 }
165 "docs" => {
166 rss_data.docs = text.to_string();
167 Ok(())
168 }
169 "ttl" => {
170 rss_data.ttl = text.to_string();
171 Ok(())
172 }
173 "items" => {
175 if is_rss_1_0 {
176 Ok(())
177 } else {
178 Err(RssError::UnknownElement("items".into()))
179 }
180 }
181 "rdf:Seq" => {
182 if is_rss_1_0 {
183 Ok(())
184 } else {
185 Err(RssError::UnknownElement("rdf:Seq".into()))
186 }
187 }
188 "rdf:li" => {
189 if is_rss_1_0 {
190 Ok(())
191 } else {
192 Err(RssError::UnknownElement("rdf:li".into()))
193 }
194 }
195 _ => Err(RssError::UnknownElement(format!(
196 "Unknown channel element: {element}"
197 ))),
198 }
199}
200
201fn parse_item_element(
213 item: &mut RssItem,
214 element: &str,
215 text: &str,
216 attributes: &[(String, String)],
217) {
218 match element {
219 "title" => {
220 item.title = text.to_string();
221 }
222 "link" => {
223 item.link = text.to_string();
224 }
225 "description" => {
226 item.description = text.to_string();
227 }
228 "author" => {
229 item.author = text.to_string();
230 }
231 "guid" => {
232 item.guid = text.to_string();
233 }
234 "pubDate" => {
235 item.pub_date = text.to_string();
236 }
237 "category" => {
238 item.category = Some(text.to_string());
239 }
240 "comments" => {
241 item.comments = Some(text.to_string());
242 }
243 "enclosure" => {
244 if attributes.is_empty() {
245 item.enclosure = None;
246 } else {
247 let enclosure_str = attributes
248 .iter()
249 .map(|(k, v)| format!("{k}=\"{v}\""))
250 .collect::<Vec<String>>()
251 .join(" ");
252 item.enclosure = Some(enclosure_str);
253 }
254 }
255 "source" => {
256 item.source = Some(text.to_string());
257 }
258 _ => (), }
260}
261
262#[derive(Clone)]
264enum ParsingState {
265 Channel,
266 Item,
267 Image,
268 None, }
270
271struct ParsingContext<'a> {
273 is_rss_1_0: bool,
274 state: ParsingState,
275 current_element: &'a str,
276 text: &'a str,
277 current_attributes: &'a [(String, String)],
278}
279
280impl ParsingContext<'_> {
281 pub(crate) fn in_channel(&self) -> bool {
283 matches!(self.state, ParsingState::Channel)
284 }
285
286 pub(crate) fn in_item(&self) -> bool {
288 matches!(self.state, ParsingState::Item)
289 }
290
291 pub(crate) fn in_image(&self) -> bool {
293 matches!(self.state, ParsingState::Image)
294 }
295}
296
297struct ImageData {
299 title: String,
300 url: String,
301 link: String,
302}
303
304fn handle_text_event(
320 rss_data: &mut RssData,
321 context: &ParsingContext,
322 current_item: &mut RssItem,
323 image_data: &mut ImageData,
324) -> Result<()> {
325 if context.in_channel() && !context.in_item() && !context.in_image()
326 {
327 if !context.current_element.is_empty() {
328 parse_channel_element(
329 rss_data,
330 context.current_element,
331 &Cow::Owned(context.text.to_string()),
332 context.is_rss_1_0,
333 )?;
334 }
335 } else if context.in_item() && !context.current_element.is_empty() {
336 parse_item_element(
337 current_item,
338 context.current_element,
339 context.text,
340 context.current_attributes,
341 );
342 } else if context.in_image() && !context.current_element.is_empty()
343 {
344 match context.current_element {
345 "title" => image_data.title = context.text.to_string(),
346 "url" => image_data.url = context.text.to_string(),
347 "link" => image_data.link = context.text.to_string(),
348 _ => (),
349 }
350 }
351 Ok(())
352}
353
354pub fn parse_rss(
377 xml_content: &str,
378 config: Option<&ParserConfig>,
379) -> Result<RssData> {
380 let mut reader = Reader::from_str(xml_content);
381 let mut rss_data = RssData::new(None);
382 let mut buf = Vec::with_capacity(1024);
383 let mut context = ParserContext::new();
384
385 loop {
386 match reader.read_event_into(&mut buf) {
387 Ok(Event::Start(ref e)) => {
388 process_start_event(e, &mut context, &mut rss_data)?;
389 }
390 Ok(Event::End(ref e)) => {
391 process_end_event(e, &mut context, &mut rss_data);
392 }
393 Ok(Event::Text(ref e)) => process_text_event(
394 e,
395 &mut context,
396 &mut rss_data,
397 config,
398 )?,
399 Ok(Event::CData(ref e)) => process_cdata_event(
400 e,
401 &mut context,
402 &mut rss_data,
403 config,
404 )?,
405 Ok(Event::Eof) => break Ok(rss_data),
406 Err(e) => return Err(RssError::XmlParseError(e)),
407 _ => (),
408 }
409 buf.clear();
410 }
411}
412
413fn process_start_event(
425 e: &BytesStart<'_>,
426 context: &mut ParserContext,
427 _rss_data: &mut RssData,
428) -> Result<()> {
429 let name_str = String::from_utf8_lossy(e.name().0).into_owned();
430 if name_str.is_empty() {
431 return Ok(());
432 }
433
434 match name_str.as_str() {
436 "rss" | "rdf:RDF" => {
437 return Ok(());
439 }
440 "channel" => {
441 context.parsing_state = ParsingState::Channel;
443 return Ok(());
444 }
445 "item" => {
446 context.parsing_state = ParsingState::Item;
447 context.current_item = RssItem::new();
448 }
449 "image" => {
450 context.parsing_state = ParsingState::Image;
451 }
452 _ => {
453 if !matches!(
455 context.parsing_state,
456 ParsingState::Item
457 | ParsingState::Channel
458 | ParsingState::Image
459 ) {
460 return Err(RssError::UnknownElement(format!(
461 "Unknown element: {name_str}"
462 )));
463 }
464 }
465 }
466
467 context.current_element = name_str;
469 context.current_attributes = e
470 .attributes()
471 .filter_map(std::result::Result::ok)
472 .map(|a| {
473 (
474 String::from_utf8_lossy(a.key.0).into_owned(),
475 String::from_utf8_lossy(&a.value).into_owned(),
476 )
477 })
478 .collect();
479
480 Ok(())
481}
482
483fn process_end_event(
494 e: &BytesEnd<'_>,
495 context: &mut ParserContext,
496 rss_data: &mut RssData,
497) {
498 let name = e.name().0.to_vec();
499 if name == b"channel" {
500 if matches!(context.parsing_state, ParsingState::Channel) {
501 context.parsing_state = ParsingState::None;
502 }
503 } else if name == b"item" {
504 if matches!(context.parsing_state, ParsingState::Item) {
505 context.parsing_state = ParsingState::None;
506 rss_data.add_item(context.current_item.clone());
507 }
508 } else if name == b"image"
509 && matches!(context.parsing_state, ParsingState::Image)
510 {
511 context.parsing_state = ParsingState::None;
512 rss_data.set_image(
513 &context.image_title.clone(),
514 &context.image_url.clone(),
515 &context.image_link.clone(),
516 );
517 }
518 context.current_element.clear();
519 context.current_attributes.clear();
520}
521
522fn process_text_event(
523 e: &BytesText<'_>,
524 context: &mut ParserContext,
525 rss_data: &mut RssData,
526 config: Option<&ParserConfig>,
527) -> Result<()> {
528 let decoded = e
529 .decode()
530 .map_err(|err| RssError::Custom(err.to_string()))?;
531 let text = quick_xml::escape::unescape(&decoded)
532 .map_err(|err| RssError::Custom(err.to_string()))?
533 .into_owned();
534
535 let parse_context = ParsingContext {
536 is_rss_1_0: matches!(
537 context.rss_version,
538 RssVersionState::Rss1_0
539 ),
540 state: context.parsing_state.clone(),
541 current_element: &context.current_element,
542 text: &text,
543 current_attributes: &context.current_attributes,
544 };
545
546 let mut image_data = ImageData {
547 title: context.image_title.clone(),
548 url: context.image_url.clone(),
549 link: context.image_link.clone(),
550 };
551
552 handle_text_event(
553 rss_data,
554 &parse_context,
555 &mut context.current_item,
556 &mut image_data,
557 )?;
558
559 context.image_title = image_data.title;
560 context.image_url = image_data.url;
561 context.image_link = image_data.link;
562
563 apply_custom_handlers(
565 &context.current_element,
566 &text,
567 &context.current_attributes,
568 config,
569 )?;
570
571 Ok(())
572}
573
574fn process_cdata_event(
586 e: &BytesCData<'_>,
587 context: &mut ParserContext,
588 rss_data: &mut RssData,
589 config: Option<&ParserConfig>,
590) -> Result<()> {
591 let text = String::from_utf8_lossy(e.as_ref()).into_owned();
592 let state = context.parsing_state.clone();
593 let parse_context = ParsingContext {
594 is_rss_1_0: matches!(
595 context.rss_version,
596 RssVersionState::Rss1_0
597 ),
598 state,
599 current_element: &context.current_element,
600 text: &text,
601 current_attributes: &context.current_attributes,
602 };
603
604 let mut image_data = ImageData {
605 title: context.image_title.clone(),
606 url: context.image_url.clone(),
607 link: context.image_link.clone(),
608 };
609
610 handle_text_event(
611 rss_data,
612 &parse_context,
613 &mut context.current_item,
614 &mut image_data,
615 )?;
616
617 context.image_title = image_data.title;
618 context.image_url = image_data.url;
619 context.image_link = image_data.link;
620
621 apply_custom_handlers(
622 &context.current_element,
623 &text,
624 &context.current_attributes,
625 config,
626 )?;
627
628 Ok(())
629}
630
631fn apply_custom_handlers(
642 element: &str,
643 text: &str,
644 attributes: &[(String, String)],
645 config: Option<&ParserConfig>,
646) -> Result<()> {
647 if let Some(cfg) = config {
648 for handler in &cfg.custom_handlers {
649 handler.handle_element(element, text, attributes)?;
650 }
651 }
652 Ok(())
653}
654
655#[allow(dead_code)]
657enum RssVersionState {
658 Rss1_0,
659 Other,
660}
661
662struct ParserContext {
664 rss_version: RssVersionState,
665 parsing_state: ParsingState,
666 current_element: String,
667 current_attributes: Vec<(String, String)>,
668 current_item: RssItem,
669 image_title: String,
670 image_url: String,
671 image_link: String,
672}
673
674impl ParserContext {
675 pub(crate) fn new() -> Self {
677 ParserContext {
678 rss_version: RssVersionState::Other,
679 parsing_state: ParsingState::None,
680 current_element: String::new(),
681 current_attributes: Vec::new(),
682 current_item: RssItem::new(),
683 image_title: String::new(),
684 image_url: String::new(),
685 image_link: String::new(),
686 }
687 }
688}
689
690impl std::fmt::Debug for ParserConfig {
691 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
692 f.debug_struct("ParserConfig")
693 .field(
694 "custom_handlers",
695 &format!("[{} handlers]", self.custom_handlers.len()),
696 )
697 .finish()
698 }
699}
700
701#[cfg(test)]
702mod tests {
703 use super::*;
704 use quick_xml::events::BytesCData;
705 use quick_xml::events::BytesStart;
706 use quick_xml::events::BytesText;
707 use std::sync::Arc;
708
709 struct MockElementHandler;
710
711 impl ElementHandler for MockElementHandler {
712 fn handle_element(
713 &self,
714 name: &str,
715 text: &str,
716 _attributes: &[(String, String)],
717 ) -> Result<()> {
718 if name == "customElement" && text == "Custom content" {
719 Ok(())
720 } else {
721 Err(RssError::UnknownElement(name.into()))
722 }
723 }
724 }
725
726 #[test]
727 fn test_parser_config_with_custom_handler() {
728 let handler = Arc::new(MockElementHandler);
729 let config = ParserConfig {
730 custom_handlers: vec![handler],
731 };
732
733 assert_eq!(config.custom_handlers.len(), 1);
734 assert!(config.custom_handlers[0]
735 .handle_element("customElement", "Custom content", &[])
736 .is_ok());
737 }
738
739 #[test]
740 fn test_parser_config_no_custom_handlers() {
741 let config = ParserConfig::default();
742 assert!(config.custom_handlers.is_empty());
743 }
744
745 #[test]
746 fn test_process_start_event_empty_name() {
747 let e = BytesStart::new("");
748 let mut context = ParserContext::new();
749 let mut rss_data = RssData::default();
750
751 let result =
752 process_start_event(&e, &mut context, &mut rss_data);
753 assert!(result.is_ok());
754 }
755
756 #[test]
757 fn test_process_start_event_non_empty_name() {
758 let e = BytesStart::new("item");
759 let mut context = ParserContext::new();
760 let mut rss_data = RssData::default();
761
762 let result =
763 process_start_event(&e, &mut context, &mut rss_data);
764 assert!(result.is_ok());
765 assert_eq!(context.current_element, "item");
766 }
767
768 #[test]
769 fn test_process_text_event() {
770 let e = BytesText::from_escaped("Sample Text");
771 let mut context = ParserContext::new();
772 let mut rss_data = RssData::default();
773
774 let result =
775 process_text_event(&e, &mut context, &mut rss_data, None);
776 assert!(result.is_ok());
777 }
778
779 #[test]
780 fn test_process_cdata_event() {
781 let e = BytesCData::new("Sample CDATA");
782 let mut context = ParserContext::new();
783 let mut rss_data = RssData::default();
784
785 let result =
786 process_cdata_event(&e, &mut context, &mut rss_data, None);
787 assert!(result.is_ok());
788 }
789
790 #[test]
791 fn test_parse_channel_rdf_li_rss_1_0() {
792 let mut rss_data = RssData::default();
793 let result =
794 parse_channel_element(&mut rss_data, "rdf:li", "", true);
795 assert!(result.is_ok());
796 }
797
798 #[test]
799 fn test_parse_channel_rdf_li_non_rss_1_0() {
800 let mut rss_data = RssData::default();
801 let result =
802 parse_channel_element(&mut rss_data, "rdf:li", "", false);
803 assert!(result.is_err());
804 }
805
806 #[test]
807 fn test_parse_channel_unknown_element() {
808 let mut rss_data = RssData::default();
809 let result = parse_channel_element(
810 &mut rss_data,
811 "unknownElement",
812 "",
813 false,
814 );
815 assert!(result.is_err());
816 }
817
818 #[test]
819 fn test_parse_rss_with_image() {
820 let rss_xml = r#"
821 <?xml version="1.0" encoding="UTF-8"?>
822 <rss version="2.0">
823 <channel>
824 <title>Sample Feed</title>
825 <link>https://example.com</link>
826 <description>A sample RSS feed</description>
827 <image>
828 <title>Sample Image</title>
829 <url>https://example.com/image.jpg</url>
830 <link>https://example.com</link>
831 </image>
832 </channel>
833 </rss>
834 "#;
835
836 let result = parse_rss(rss_xml, None);
837
838 match result {
839 Ok(parsed_data) => {
840 assert_eq!(parsed_data.title, "Sample Feed");
841 assert_eq!(parsed_data.image_title, "Sample Image");
842 }
843 Err(RssError::UnknownElement(element)) => {
844 panic!("Failed due to unknown element: {element:?}");
845 }
846 Err(e) => panic!("Failed to parse RSS with image: {e:?}"),
847 }
848 }
849
850 #[test]
851 fn test_parse_rss_1_0() {
852 let rss_xml = r#"
853 <?xml version="1.0" encoding="UTF-8"?>
854 <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
855 xmlns="http://purl.org/rss/1.0/">
856 <channel rdf:about="https://example.com">
857 <title>Sample Feed</title>
858 <link>https://example.com</link>
859 <description>A sample RSS feed</description>
860 </channel>
861 </rdf:RDF>
862 "#;
863
864 let result = parse_rss(rss_xml, None);
865
866 match result {
867 Ok(parsed_data) => {
868 assert_eq!(parsed_data.title, "Sample Feed");
869 }
870 Err(RssError::UnknownElement(element)) => {
871 panic!("Failed due to unknown element: {element:?}");
872 }
873 Err(e) => panic!("Failed to parse RSS 1.0: {e:?}"),
874 }
875 }
876
877 #[test]
878 fn test_parse_rss_2_0() {
879 let rss_xml = r#"
880 <?xml version="1.0" encoding="UTF-8"?>
881 <rss version="2.0">
882 <channel>
883 <title>Sample Feed</title>
884 <link>https://example.com</link>
885 <description>A sample RSS feed</description>
886 </channel>
887 </rss>
888 "#;
889
890 let result = parse_rss(rss_xml, None);
891
892 match result {
893 Ok(parsed_data) => {
894 assert_eq!(parsed_data.title, "Sample Feed");
895 }
896 Err(RssError::UnknownElement(element)) => {
897 panic!("Failed due to unknown element: {element:?}");
898 }
899 Err(e) => panic!("Failed to parse RSS 2.0: {e:?}"),
900 }
901 }
902
903 #[test]
904 fn test_parse_channel_language() {
905 let mut rss_data = RssData::default();
906 let result = parse_channel_element(
907 &mut rss_data,
908 "language",
909 "en-US",
910 false,
911 );
912 assert!(result.is_ok());
913 assert_eq!(rss_data.language, "en-US");
914 }
915
916 #[test]
917 fn test_parse_channel_copyright() {
918 let mut rss_data = RssData::default();
919 let result = parse_channel_element(
920 &mut rss_data,
921 "copyright",
922 "© 2024",
923 false,
924 );
925 assert!(result.is_ok());
926 assert_eq!(rss_data.copyright, "© 2024");
927 }
928
929 #[test]
930 fn test_parse_channel_managing_editor() {
931 let mut rss_data = RssData::default();
932 let result = parse_channel_element(
933 &mut rss_data,
934 "managingEditor",
935 "editor@example.com",
936 false,
937 );
938 assert!(result.is_ok());
939 assert_eq!(rss_data.managing_editor, "editor@example.com");
940 }
941
942 #[test]
943 fn test_parse_channel_webmaster() {
944 let mut rss_data = RssData::default();
945 let result = parse_channel_element(
946 &mut rss_data,
947 "webMaster",
948 "webmaster@example.com",
949 false,
950 );
951 assert!(result.is_ok());
952 assert_eq!(rss_data.webmaster, "webmaster@example.com");
953 }
954
955 #[test]
956 fn test_parse_channel_pub_date() {
957 let mut rss_data = RssData::default();
958 let result = parse_channel_element(
959 &mut rss_data,
960 "pubDate",
961 "Mon, 10 Oct 2024 04:00:00 GMT",
962 false,
963 );
964 assert!(result.is_ok());
965 assert_eq!(rss_data.pub_date, "Mon, 10 Oct 2024 04:00:00 GMT");
966 }
967
968 #[test]
969 fn test_parse_channel_last_build_date() {
970 let mut rss_data = RssData::default();
971 let result = parse_channel_element(
972 &mut rss_data,
973 "lastBuildDate",
974 "Mon, 10 Oct 2024 05:00:00 GMT",
975 false,
976 );
977 assert!(result.is_ok());
978 assert_eq!(
979 rss_data.last_build_date,
980 "Mon, 10 Oct 2024 05:00:00 GMT"
981 );
982 }
983
984 #[test]
985 fn test_parse_channel_category() {
986 let mut rss_data = RssData::default();
987 let result = parse_channel_element(
988 &mut rss_data,
989 "category",
990 "Technology",
991 false,
992 );
993 assert!(result.is_ok());
994 assert_eq!(rss_data.category, "Technology");
995 }
996
997 #[test]
998 fn test_parse_channel_generator() {
999 let mut rss_data = RssData::default();
1000 let result = parse_channel_element(
1001 &mut rss_data,
1002 "generator",
1003 "RSS Generator v1.0",
1004 false,
1005 );
1006 assert!(result.is_ok());
1007 assert_eq!(rss_data.generator, "RSS Generator v1.0");
1008 }
1009
1010 #[test]
1011 fn test_parse_channel_docs() {
1012 let mut rss_data = RssData::default();
1013 let result = parse_channel_element(
1014 &mut rss_data,
1015 "docs",
1016 "https://example.com/rss/docs",
1017 false,
1018 );
1019 assert!(result.is_ok());
1020 assert_eq!(rss_data.docs, "https://example.com/rss/docs");
1021 }
1022
1023 #[test]
1024 fn test_parse_channel_ttl() {
1025 let mut rss_data = RssData::default();
1026 let result =
1027 parse_channel_element(&mut rss_data, "ttl", "60", false);
1028 assert!(result.is_ok());
1029 assert_eq!(rss_data.ttl, "60");
1030 }
1031
1032 #[test]
1033 fn test_parse_channel_items_rss_1_0() {
1034 let mut rss_data = RssData::default();
1035 let result =
1036 parse_channel_element(&mut rss_data, "items", "", true);
1037 assert!(result.is_ok());
1038 }
1039
1040 #[test]
1041 fn test_parse_channel_items_non_rss_1_0() {
1042 let mut rss_data = RssData::default();
1043 let result =
1044 parse_channel_element(&mut rss_data, "items", "", false);
1045 assert!(result.is_err());
1046 }
1047
1048 #[test]
1049 fn test_parse_channel_rdf_seq_rss_1_0() {
1050 let mut rss_data = RssData::default();
1051 let result =
1052 parse_channel_element(&mut rss_data, "rdf:Seq", "", true);
1053 assert!(result.is_ok());
1054 }
1055
1056 #[test]
1057 fn test_parse_channel_rdf_seq_non_rss_1_0() {
1058 let mut rss_data = RssData::default();
1059 let result =
1060 parse_channel_element(&mut rss_data, "rdf:Seq", "", false);
1061 assert!(result.is_err());
1062 }
1063
1064 #[test]
1065 fn test_parse_item_author() {
1066 let mut item = RssItem::default();
1067 parse_item_element(
1068 &mut item,
1069 "author",
1070 "author@example.com",
1071 &[],
1072 );
1073 assert_eq!(item.author, "author@example.com");
1074 }
1075
1076 #[test]
1077 fn test_parse_item_guid() {
1078 let mut item = RssItem::default();
1079 parse_item_element(&mut item, "guid", "1234-5678", &[]);
1080 assert_eq!(item.guid, "1234-5678");
1081 }
1082
1083 #[test]
1084 fn test_parse_item_pub_date() {
1085 let mut item = RssItem::default();
1086 parse_item_element(
1087 &mut item,
1088 "pubDate",
1089 "Mon, 10 Oct 2024 04:00:00 GMT",
1090 &[],
1091 );
1092 assert_eq!(item.pub_date, "Mon, 10 Oct 2024 04:00:00 GMT");
1093 }
1094
1095 #[test]
1096 fn test_parse_item_category() {
1097 let mut item = RssItem::default();
1098 parse_item_element(&mut item, "category", "Technology", &[]);
1099 assert_eq!(item.category, Some("Technology".to_string()));
1100 }
1101
1102 #[test]
1103 fn test_parse_item_comments() {
1104 let mut item = RssItem::default();
1105 parse_item_element(
1106 &mut item,
1107 "comments",
1108 "https://example.com/comments",
1109 &[],
1110 );
1111 assert_eq!(
1112 item.comments,
1113 Some("https://example.com/comments".to_string())
1114 );
1115 }
1116
1117 #[test]
1118 fn test_parse_item_enclosure_with_attributes() {
1119 let mut item = RssItem::default();
1120 let attributes = vec![
1121 (
1122 "url".to_string(),
1123 "https://example.com/audio.mp3".to_string(),
1124 ),
1125 ("length".to_string(), "123456".to_string()),
1126 ("type".to_string(), "audio/mpeg".to_string()),
1127 ];
1128 parse_item_element(&mut item, "enclosure", "", &attributes);
1129 assert_eq!(
1130 item.enclosure,
1131 Some("url=\"https://example.com/audio.mp3\" length=\"123456\" type=\"audio/mpeg\"".to_string())
1132 );
1133 }
1134
1135 #[test]
1136 fn test_parse_item_enclosure_without_attributes() {
1137 let mut item = RssItem::default();
1138 parse_item_element(&mut item, "enclosure", "", &[]);
1139 assert_eq!(item.enclosure, None);
1140 }
1141
1142 #[test]
1143 fn test_parse_item_source() {
1144 let mut item = RssItem::default();
1145 parse_item_element(
1146 &mut item,
1147 "source",
1148 "https://example.com",
1149 &[],
1150 );
1151 assert_eq!(
1152 item.source,
1153 Some("https://example.com".to_string())
1154 );
1155 }
1156
1157 #[test]
1158 fn test_process_text_event_in_channel() {
1159 let e = BytesText::from_escaped("Channel Title");
1160 let mut context = ParserContext::new();
1161 context.parsing_state = ParsingState::Channel;
1162 context.current_element = "title".to_string();
1163 let mut rss_data = RssData::default();
1164
1165 let result =
1166 process_text_event(&e, &mut context, &mut rss_data, None);
1167 assert!(result.is_ok());
1168 assert_eq!(rss_data.title, "Channel Title");
1169 }
1170
1171 #[test]
1172 fn test_process_text_event_in_item() {
1173 let e = BytesText::from_escaped("Item Title");
1174 let mut context = ParserContext::new();
1175 context.parsing_state = ParsingState::Item;
1176 context.current_element = "title".to_string();
1177 let mut rss_data = RssData::default();
1178
1179 let result =
1180 process_text_event(&e, &mut context, &mut rss_data, None);
1181 assert!(result.is_ok());
1182 assert_eq!(context.current_item.title, "Item Title");
1183 }
1184
1185 #[test]
1186 fn test_process_cdata_event_in_channel() {
1187 let e = BytesCData::new("CDATA Description");
1188 let mut context = ParserContext::new();
1189 context.parsing_state = ParsingState::Channel;
1190 context.current_element = "description".to_string();
1191 let mut rss_data = RssData::default();
1192
1193 let result =
1194 process_cdata_event(&e, &mut context, &mut rss_data, None);
1195 assert!(result.is_ok());
1196 assert_eq!(rss_data.description, "CDATA Description");
1197 }
1198
1199 #[test]
1200 fn test_process_cdata_event_in_item() {
1201 let e = BytesCData::new("CDATA Item Desc");
1202 let mut context = ParserContext::new();
1203 context.parsing_state = ParsingState::Item;
1204 context.current_element = "description".to_string();
1205 let mut rss_data = RssData::default();
1206
1207 let result =
1208 process_cdata_event(&e, &mut context, &mut rss_data, None);
1209 assert!(result.is_ok());
1210 assert_eq!(context.current_item.description, "CDATA Item Desc");
1211 }
1212
1213 #[test]
1214 fn test_process_text_event_with_custom_handler() {
1215 let handler = Arc::new(MockElementHandler);
1216 let config = ParserConfig {
1217 custom_handlers: vec![handler],
1218 };
1219
1220 let e = BytesText::from_escaped("Custom content");
1221 let mut context = ParserContext::new();
1222 context.current_element = "customElement".to_string();
1225 let mut rss_data = RssData::default();
1226
1227 let result = process_text_event(
1228 &e,
1229 &mut context,
1230 &mut rss_data,
1231 Some(&config),
1232 );
1233 assert!(result.is_ok());
1234 }
1235
1236 #[test]
1237 fn test_parse_rss_with_cdata() {
1238 let rss_xml = r#"
1239 <?xml version="1.0" encoding="UTF-8"?>
1240 <rss version="2.0">
1241 <channel>
1242 <title>CDATA Feed</title>
1243 <link>https://example.com</link>
1244 <description><![CDATA[A feed with <b>CDATA</b> content]]></description>
1245 <item>
1246 <title><![CDATA[CDATA Item]]></title>
1247 <link>https://example.com/item1</link>
1248 <description><![CDATA[Item with <em>HTML</em>]]></description>
1249 </item>
1250 </channel>
1251 </rss>
1252 "#;
1253
1254 let result = parse_rss(rss_xml, None);
1255 assert!(result.is_ok());
1256 let data = result.unwrap();
1257 assert_eq!(data.title, "CDATA Feed");
1258 assert!(data.description.contains("CDATA"));
1259 assert_eq!(data.items.len(), 1);
1260 assert_eq!(data.items[0].title, "CDATA Item");
1261 }
1262
1263 #[test]
1264 fn test_process_text_event_with_escaped_entities() {
1265 let e = BytesText::from_escaped("& < >");
1266 let mut context = ParserContext::new();
1267 context.parsing_state = ParsingState::Channel;
1268 context.current_element = "title".to_string();
1269 let mut rss_data = RssData::default();
1270
1271 let result =
1272 process_text_event(&e, &mut context, &mut rss_data, None);
1273 assert!(result.is_ok());
1274 assert_eq!(rss_data.title, "& < >");
1276 }
1277
1278 #[test]
1279 fn test_process_start_event_unknown_element_outside_context() {
1280 let e = BytesStart::new("unknownRoot");
1281 let mut context = ParserContext::new();
1282 context.parsing_state = ParsingState::None;
1284 let mut rss_data = RssData::default();
1285
1286 let result =
1287 process_start_event(&e, &mut context, &mut rss_data);
1288 assert!(result.is_err());
1289 }
1290
1291 #[test]
1292 fn test_parse_rss_with_all_channel_fields() {
1293 let rss_xml = r#"
1294 <?xml version="1.0" encoding="UTF-8"?>
1295 <rss version="2.0">
1296 <channel>
1297 <title>Full Channel</title>
1298 <link>https://example.com</link>
1299 <description>A complete channel</description>
1300 <language>en-US</language>
1301 <copyright>2024</copyright>
1302 <managingEditor>editor@example.com</managingEditor>
1303 <webMaster>webmaster@example.com</webMaster>
1304 <pubDate>Mon, 01 Jan 2024 00:00:00 GMT</pubDate>
1305 <lastBuildDate>Mon, 01 Jan 2024 00:00:00 GMT</lastBuildDate>
1306 <category>Technology</category>
1307 <generator>Test Generator</generator>
1308 <docs>https://example.com/docs</docs>
1309 <ttl>60</ttl>
1310 <item>
1311 <title>Item 1</title>
1312 <link>https://example.com/item1</link>
1313 <description>First item</description>
1314 <author>author@example.com</author>
1315 <category>Cat1</category>
1316 <comments>https://example.com/item1/comments</comments>
1317 <source>https://example.com</source>
1318 </item>
1319 </channel>
1320 </rss>
1321 "#;
1322
1323 let result = parse_rss(rss_xml, None);
1324 assert!(result.is_ok());
1325 let data = result.unwrap();
1326 assert_eq!(data.title, "Full Channel");
1327 assert_eq!(data.language, "en-US");
1328 assert_eq!(data.copyright, "2024");
1329 assert_eq!(data.managing_editor, "editor@example.com");
1330 assert_eq!(data.webmaster, "webmaster@example.com");
1331 assert_eq!(data.category, "Technology");
1332 assert_eq!(data.generator, "Test Generator");
1333 assert_eq!(data.docs, "https://example.com/docs");
1334 assert_eq!(data.ttl, "60");
1335 assert_eq!(data.items.len(), 1);
1336 assert_eq!(data.items[0].author, "author@example.com");
1337 assert_eq!(data.items[0].category, Some("Cat1".to_string()));
1338 assert_eq!(
1339 data.items[0].comments,
1340 Some("https://example.com/item1/comments".to_string())
1341 );
1342 assert_eq!(
1343 data.items[0].source,
1344 Some("https://example.com".to_string())
1345 );
1346 }
1347
1348 #[test]
1349 fn test_parse_rss_malformed_xml() {
1350 let xml = "<rss><channel><title>Test</unclosed";
1351 let result = parse_rss(xml, None);
1352 assert!(result.is_err());
1353 }
1354
1355 #[test]
1356 fn test_parse_rss_with_cdata_in_image() {
1357 let rss_xml = r#"<?xml version="1.0" encoding="UTF-8"?>
1358 <rss version="2.0">
1359 <channel>
1360 <title>Test Feed</title>
1361 <link>https://example.com</link>
1362 <description>Test</description>
1363 <image>
1364 <title><![CDATA[Image Title]]></title>
1365 <url><![CDATA[https://example.com/image.png]]></url>
1366 <link><![CDATA[https://example.com]]></link>
1367 </image>
1368 <item>
1369 <title>Item 1</title>
1370 <link>https://example.com/1</link>
1371 <description>Desc</description>
1372 </item>
1373 </channel>
1374 </rss>
1375 "#;
1376
1377 let result = parse_rss(rss_xml, None);
1378 assert!(result.is_ok());
1379 let data = result.unwrap();
1380 assert_eq!(data.image_title, "Image Title");
1381 assert_eq!(data.image_url, "https://example.com/image.png");
1382 assert_eq!(data.image_link, "https://example.com");
1383 }
1384
1385 #[test]
1386 fn test_parse_rss_with_cdata_in_item() {
1387 let rss_xml = r#"<?xml version="1.0" encoding="UTF-8"?>
1388 <rss version="2.0">
1389 <channel>
1390 <title>Test Feed</title>
1391 <link>https://example.com</link>
1392 <description>Test</description>
1393 <item>
1394 <title><![CDATA[CDATA Item Title]]></title>
1395 <link>https://example.com/1</link>
1396 <description><![CDATA[<p>HTML content</p>]]></description>
1397 </item>
1398 </channel>
1399 </rss>
1400 "#;
1401
1402 let result = parse_rss(rss_xml, None);
1403 assert!(result.is_ok());
1404 let data = result.unwrap();
1405 assert_eq!(data.items[0].title, "CDATA Item Title");
1406 assert!(data.items[0].description.contains("HTML content"));
1407 }
1408
1409 #[test]
1410 fn test_process_text_event_with_failing_custom_handler() {
1411 let rss_xml = r#"<?xml version="1.0" encoding="UTF-8"?>
1412 <rss version="2.0">
1413 <channel>
1414 <title>Test Feed</title>
1415 <link>https://example.com</link>
1416 <description>Test</description>
1417 <item>
1418 <title>Item</title>
1419 <link>https://example.com/1</link>
1420 <description>Desc</description>
1421 <unknownField>value</unknownField>
1422 </item>
1423 </channel>
1424 </rss>
1425 "#;
1426
1427 let handler = Arc::new(MockElementHandler);
1428 let config = ParserConfig {
1429 custom_handlers: vec![handler],
1430 };
1431
1432 let result = parse_rss(rss_xml, Some(&config));
1433 assert!(result.is_err());
1435 }
1436
1437 #[test]
1438 fn test_parse_element_with_attributes() {
1439 let rss_xml = r#"<?xml version="1.0" encoding="UTF-8"?>
1440 <rss version="2.0">
1441 <channel>
1442 <title>Test Feed</title>
1443 <link>https://example.com</link>
1444 <description>Test</description>
1445 <item>
1446 <title>Item</title>
1447 <link href="https://example.com/1">https://example.com/1</link>
1448 <description>Desc</description>
1449 <enclosure url="https://example.com/audio.mp3" length="12345" type="audio/mpeg"/>
1450 </item>
1451 </channel>
1452 </rss>
1453 "#;
1454
1455 let result = parse_rss(rss_xml, None);
1456 assert!(result.is_ok());
1457 }
1458
1459 #[test]
1460 fn test_cdata_event_channel_elements() {
1461 let rss_xml = r#"<?xml version="1.0" encoding="UTF-8"?>
1462 <rss version="2.0">
1463 <channel>
1464 <title><![CDATA[CDATA Channel Title]]></title>
1465 <link>https://example.com</link>
1466 <description><![CDATA[CDATA Description]]></description>
1467 <item>
1468 <title>Item</title>
1469 <link>https://example.com/1</link>
1470 <description>Desc</description>
1471 </item>
1472 </channel>
1473 </rss>
1474 "#;
1475
1476 let result = parse_rss(rss_xml, None);
1477 assert!(result.is_ok());
1478 let data = result.unwrap();
1479 assert_eq!(data.title, "CDATA Channel Title");
1480 assert_eq!(data.description, "CDATA Description");
1481 }
1482}