1use crate::data::{
10 parse_date as parse_rss_date, validate_link_field, RssData,
11 RssVersion,
12};
13use crate::error::{Result, RssError, ValidationError};
14use time::OffsetDateTime;
15use url::Url;
16
17const MAX_URL_LENGTH: usize = 2000;
19
20#[derive(Debug)]
22pub struct RssFeedValidator<'a> {
23 rss_data: &'a RssData,
24}
25
26impl<'a> RssFeedValidator<'a> {
27 #[must_use]
37 pub fn new(rss_data: &'a RssData) -> Self {
38 RssFeedValidator { rss_data }
39 }
40
41 pub fn validate(&self) -> Result<()> {
55 let mut errors = Vec::new();
56
57 self.validate_rss_data(&mut errors);
58 self.validate_structure(&mut errors);
59 self.validate_items(&mut errors);
60 self.validate_dates(&mut errors);
61 self.validate_version_specific(&mut errors);
62
63 if errors.is_empty() {
64 Ok(())
65 } else {
66 Err(RssError::ValidationErrors(errors))
71 }
72 }
73
74 fn validate_rss_data(&self, errors: &mut Vec<ValidationError>) {
76 if let Err(e) = self.rss_data.validate() {
77 errors.push(ValidationError {
78 field: "rss_data".to_string(),
79 message: e.to_string(),
80 });
81 }
82 }
83
84 fn validate_structure(&self, errors: &mut Vec<ValidationError>) {
86 Self::validate_url(&self.rss_data.link, "channel.link", errors);
87
88 for (index, item) in self.rss_data.items.iter().enumerate() {
89 if item.link.is_empty() {
96 continue;
97 }
98 if let Err(e) = validate_link_field(&item.link) {
99 errors.push(ValidationError::new(
100 format!("item.{index}.link"),
101 format!("Invalid item.{index}.link: {e}"),
102 ));
103 }
104 }
105
106 if self.rss_data.items.is_empty() {
107 errors.push(ValidationError::new(
108 "items",
109 "RSS feed must contain at least one item",
110 ));
111 }
112
113 self.validate_guids(errors);
114 self.validate_atom_link(errors);
115 }
116
117 fn validate_guids(&self, errors: &mut Vec<ValidationError>) {
119 let mut guids = std::collections::HashSet::new();
120 for item in &self.rss_data.items {
121 if !guids.insert(&item.guid) {
122 errors.push(ValidationError::new(
123 "guid",
124 format!("Duplicate GUID found: {}", item.guid),
125 ));
126 }
127 }
128 }
129
130 fn validate_atom_link(&self, errors: &mut Vec<ValidationError>) {
132 if self.rss_data.version == RssVersion::RSS2_0
133 && self.rss_data.atom_link.is_empty()
134 {
135 errors.push(ValidationError::new(
136 "atom_link",
137 "atom:link is required for RSS 2.0 feeds",
138 ));
139 }
140 }
141
142 fn validate_items(&self, errors: &mut Vec<ValidationError>) {
144 for (index, item) in self.rss_data.items.iter().enumerate() {
145 if let Err(e) = item.validate() {
146 errors.push(ValidationError::new(
147 format!("item[{index}]"),
148 format!("Item validation failed: {e}"),
149 ));
150 }
151 }
152 }
153
154 fn validate_dates(&self, errors: &mut Vec<ValidationError>) {
156 Self::validate_date(&self.rss_data.pub_date, "pubDate", errors);
157 Self::validate_date(
158 &self.rss_data.last_build_date,
159 "lastBuildDate",
160 errors,
161 );
162
163 for (index, item) in self.rss_data.items.iter().enumerate() {
164 Self::validate_date(
165 &item.pub_date,
166 &format!("item[{index}].pubDate"),
167 errors,
168 );
169 }
170 }
171
172 fn validate_date(
174 date_str: &str,
175 field: &str,
176 errors: &mut Vec<ValidationError>,
177 ) {
178 if !date_str.is_empty() {
179 if let Err(e) = Self::parse_date(date_str) {
180 errors.push(ValidationError::new(
181 field,
182 format!("Invalid date format: {e}"),
183 ));
184 }
185 }
186 }
187
188 pub fn parse_date(date_str: &str) -> Result<OffsetDateTime> {
202 parse_rss_date(date_str)
203 }
204
205 fn validate_version_specific(
207 &self,
208 errors: &mut Vec<ValidationError>,
209 ) {
210 match self.rss_data.version {
211 RssVersion::RSS2_0 => {
212 if self.rss_data.generator.is_empty() {
213 errors.push(ValidationError {
214 field: "generator".to_string(),
215 message:
216 "generator is recommended for RSS 2.0 feeds"
217 .to_string(),
218 });
219 }
220 if self.rss_data.atom_link.is_empty() {
221 errors.push(ValidationError {
222 field: "atom_link".to_string(),
223 message:
224 "atom:link is required for RSS 2.0 feeds"
225 .to_string(),
226 });
227 }
228 }
229 RssVersion::RSS1_0
230 if self
231 .rss_data
232 .items
233 .iter()
234 .any(|item| item.guid.is_empty()) =>
235 {
236 errors.push(ValidationError {
237 field: "guid".to_string(),
238 message: "All items must have a guid in RSS 1.0"
239 .to_string(),
240 });
241 }
242 _ => {}
243 }
244 }
245
246 fn validate_url(
254 url: &str,
255 field: &str,
256 errors: &mut Vec<ValidationError>,
257 ) {
258 if url.len() > MAX_URL_LENGTH {
259 errors.push(ValidationError {
260 field: field.to_string(),
261 message: format!(
262 "URL exceeds maximum length of {MAX_URL_LENGTH} characters"
263 ),
264 });
265 return;
266 }
267
268 match Url::parse(url) {
269 Ok(parsed_url) => {
270 if parsed_url.scheme() != "http"
271 && parsed_url.scheme() != "https"
272 {
273 errors.push(ValidationError {
274 field: field.to_string(),
275 message: format!("Invalid URL scheme in {field}: {url}. Only HTTP and HTTPS are allowed."),
276 });
277 }
278 }
279 Err(_) => {
280 errors.push(ValidationError {
281 field: field.to_string(),
282 message: format!("Invalid URL in {field}: {url}"),
283 });
284 }
285 }
286 }
287}
288
289pub fn validate_rss_feed(rss_data: &RssData) -> Result<()> {
304 let validator = RssFeedValidator::new(rss_data);
305 validator.validate()
306}
307
308#[cfg(test)]
309mod tests {
310 use super::*;
311 use crate::data::RssItem;
312
313 #[test]
314 fn test_valid_rss_feed() {
315 let mut rss_data = RssData::new(Some(RssVersion::RSS2_0))
316 .title("Test Feed")
317 .link("https://example.com")
318 .description("A test feed")
319 .atom_link("https://example.com/feed.xml")
320 .pub_date("Mon, 01 Jan 2024 00:00:00 GMT")
321 .generator("RSS Gen Test");
322
323 rss_data.add_item(
324 RssItem::new()
325 .title("Test Item")
326 .link("https://example.com/item1")
327 .description("A test item")
328 .guid("unique-id-1")
329 .pub_date("Mon, 01 Jan 2024 00:00:00 GMT"),
330 );
331
332 let validator = RssFeedValidator::new(&rss_data);
333 assert!(validator.validate().is_ok());
334 }
335
336 #[test]
337 fn test_invalid_rss_feed() {
338 let rss_data = RssData::new(Some(RssVersion::RSS2_0))
339 .title("Test Feed")
340 .link("https://example.com")
341 .description("A test feed")
342 .pub_date("Invalid Date");
343
344 let validator = RssFeedValidator::new(&rss_data);
345 let result = validator.validate();
346 assert!(result.is_err());
347 if let Err(RssError::ValidationErrors(errors)) = result {
348 assert!(errors.iter().any(|e| e.field == "atom_link"
349 && e.message.contains("atom:link is required")));
350 assert!(errors.iter().any(|e| e.field == "items"
351 && e.message.contains(
352 "RSS feed must contain at least one item"
353 )));
354 assert!(errors.iter().any(|e| e.field == "pubDate"
355 && e.message.contains("Invalid date format")));
356 } else {
357 panic!("Expected ValidationErrors");
358 }
359 }
360
361 #[test]
362 fn test_validate_url_valid() {
363 let rss_data = RssData::new(None);
364 let mut errors = Vec::new();
365
366 RssFeedValidator::validate_url(
367 "https://example.com",
368 "test",
369 &mut errors,
370 );
371 RssFeedValidator::validate_url(
372 "http://example.com",
373 "test",
374 &mut errors,
375 );
376 RssFeedValidator::validate_url(
377 "https://sub.example.com/path?query=value",
378 "test",
379 &mut errors,
380 );
381
382 assert!(errors.is_empty());
383 assert!(rss_data.link.is_empty());
384 }
385
386 #[test]
387 fn test_validate_url_invalid() {
388 let mut errors = Vec::new();
389
390 RssFeedValidator::validate_url(
391 "not a url",
392 "test",
393 &mut errors,
394 );
395 RssFeedValidator::validate_url(
396 "ftp://example.com",
397 "test",
398 &mut errors,
399 );
400 RssFeedValidator::validate_url("http://", "test", &mut errors);
401 RssFeedValidator::validate_url("https://", "test", &mut errors);
402 RssFeedValidator::validate_url(
403 "file:///path/to/file",
404 "test",
405 &mut errors,
406 );
407
408 assert_eq!(errors.len(), 5);
409 }
410
411 #[test]
412 fn test_validate_structure_with_urls() {
413 let mut rss_data = RssData::new(Some(RssVersion::RSS2_0))
414 .title("Test Feed")
415 .link("https://example.com")
416 .description("A test feed")
417 .atom_link("https://example.com/feed.xml");
418
419 rss_data.add_item(
420 RssItem::new()
421 .title("Test Item")
422 .link("https://example.com/item1")
423 .description("A test item")
424 .guid("unique-id-1"),
425 );
426
427 let validator = RssFeedValidator::new(&rss_data);
428 let mut errors = Vec::new();
429 validator.validate_structure(&mut errors);
430 assert!(errors.is_empty());
431
432 rss_data.link = "not a url".to_string();
434 let validator = RssFeedValidator::new(&rss_data);
435 let mut errors = Vec::new();
436 validator.validate_structure(&mut errors);
437 assert!(errors
438 .iter()
439 .any(|e| e.message.contains("Invalid URL")));
440 }
441
442 #[test]
443 fn test_validate_version_specific_rss2_0() {
444 let mut rss_data = RssData::new(Some(RssVersion::RSS2_0))
445 .title("Test Feed")
446 .link("https://example.com")
447 .description("A test feed")
448 .atom_link("https://example.com/feed.xml")
449 .generator("RSS Gen Test");
450
451 let validator = RssFeedValidator::new(&rss_data);
452 let mut errors = Vec::new();
453 validator.validate_version_specific(&mut errors);
454 assert!(errors.is_empty());
455
456 rss_data.generator = String::new();
458 let validator = RssFeedValidator::new(&rss_data);
459 let mut errors = Vec::new();
460 validator.validate_version_specific(&mut errors);
461 assert!(errors
462 .iter()
463 .any(|e| e.message.contains("generator is recommended")));
464
465 rss_data.atom_link = String::new();
467 let validator = RssFeedValidator::new(&rss_data);
468 let mut errors = Vec::new();
469 validator.validate_version_specific(&mut errors);
470 assert!(errors
471 .iter()
472 .any(|e| e.message.contains("atom:link is required")));
473 }
474
475 #[test]
476 fn test_validate_version_specific_rss1_0() {
477 let mut rss_data = RssData::new(Some(RssVersion::RSS1_0))
478 .title("Test Feed")
479 .link("https://example.com")
480 .description("A test feed");
481
482 rss_data.add_item(
483 RssItem::new()
484 .title("Test Item")
485 .link("https://example.com/item1")
486 .description("A test item")
487 .guid("unique-id-1"),
488 );
489
490 let validator = RssFeedValidator::new(&rss_data);
491 let mut errors = Vec::new();
492 validator.validate_version_specific(&mut errors);
493 assert!(errors.is_empty());
494
495 rss_data.items[0].guid = String::new();
497 let validator = RssFeedValidator::new(&rss_data);
498 let mut errors = Vec::new();
499 validator.validate_version_specific(&mut errors);
500 assert!(errors.iter().any(|e| e
501 .message
502 .contains("All items must have a guid in RSS 1.0")));
503 }
504
505 #[test]
506 fn test_validate_version_specific_older_versions() {
507 for version in &[
508 RssVersion::RSS0_90,
509 RssVersion::RSS0_91,
510 RssVersion::RSS0_92,
511 ] {
512 let rss_data = RssData::new(Some(*version))
513 .title("Test Feed")
514 .link("https://example.com")
515 .description("A test feed");
516
517 let validator = RssFeedValidator::new(&rss_data);
518 let mut errors = Vec::new();
519 validator.validate_version_specific(&mut errors);
520 assert!(
521 errors.is_empty(),
522 "Unexpected errors for version {version:?}"
523 );
524 }
525 }
526
527 #[test]
528 fn test_parse_date_valid() {
529 let valid_date = "Mon, 01 Jan 2024 00:00:00 GMT";
530 assert!(RssFeedValidator::parse_date(valid_date).is_ok());
531 }
532
533 #[test]
534 fn test_parse_date_invalid() {
535 let invalid_date = "Invalid Date";
536 assert!(RssFeedValidator::parse_date(invalid_date).is_err());
537 }
538
539 #[test]
540 fn test_validate_guids() {
541 let mut rss_data = RssData::new(Some(RssVersion::RSS2_0))
542 .title("Test Feed")
543 .link("https://example.com")
544 .description("A test feed");
545
546 rss_data.add_item(RssItem::new().guid("guid1"));
547 rss_data.add_item(RssItem::new().guid("guid2"));
548 rss_data.add_item(RssItem::new().guid("guid1")); let validator = RssFeedValidator::new(&rss_data);
551 let mut errors = Vec::new();
552 validator.validate_guids(&mut errors);
553
554 assert_eq!(errors.len(), 1);
555 assert!(errors[0]
556 .message
557 .contains("Duplicate GUID found: guid1"));
558 }
559
560 #[test]
561 fn test_validate_atom_link() {
562 let rss_data = RssData::new(Some(RssVersion::RSS2_0))
563 .title("Test Feed")
564 .link("https://example.com")
565 .description("A test feed");
566
567 let validator = RssFeedValidator::new(&rss_data);
568 let mut errors = Vec::new();
569 validator.validate_atom_link(&mut errors);
570
571 assert_eq!(errors.len(), 1);
572 assert!(errors[0]
573 .message
574 .contains("atom:link is required for RSS 2.0 feeds"));
575
576 let rss_data_with_atom =
577 rss_data.atom_link("https://example.com/feed.xml");
578 let validator = RssFeedValidator::new(&rss_data_with_atom);
579 let mut errors = Vec::new();
580 validator.validate_atom_link(&mut errors);
581
582 assert!(errors.is_empty());
583 }
584
585 #[test]
586 fn test_validate_rss_data() {
587 let invalid_rss_data = RssData::new(Some(RssVersion::RSS2_0)); let validator = RssFeedValidator::new(&invalid_rss_data);
590 let mut errors = Vec::new();
591 validator.validate_rss_data(&mut errors);
592
593 assert!(!errors.is_empty());
594 assert!(
595 errors[0].message.contains("channel.title is missing"),
596 "expected `channel.title is missing`, got: {:?}",
597 errors[0].message
598 );
599 }
600
601 #[test]
602 fn test_validate_items_with_invalid_item() {
603 let mut rss_data = RssData::new(Some(RssVersion::RSS2_0))
604 .title("Test Feed")
605 .link("https://example.com")
606 .description("A test feed")
607 .atom_link("https://example.com/feed.xml")
608 .generator("Test");
609
610 rss_data.add_item(RssItem::new().guid("guid1"));
612
613 let validator = RssFeedValidator::new(&rss_data);
614 let mut errors = Vec::new();
615 validator.validate_items(&mut errors);
616
617 assert!(!errors.is_empty(), "Expected item validation errors");
618 assert!(errors[0].field.contains("item[0]"));
619 assert!(errors[0].message.contains("Item validation failed"));
620 }
621
622 #[test]
623 fn test_validate_dates_with_invalid_item_date() {
624 let mut rss_data = RssData::new(Some(RssVersion::RSS2_0))
625 .title("Test Feed")
626 .link("https://example.com")
627 .description("A test feed")
628 .atom_link("https://example.com/feed.xml")
629 .pub_date("Mon, 01 Jan 2024 00:00:00 GMT")
630 .generator("Test");
631
632 rss_data.add_item(
633 RssItem::new()
634 .title("Item")
635 .link("https://example.com/item")
636 .description("Desc")
637 .guid("guid1")
638 .pub_date("not a valid date"),
639 );
640
641 let validator = RssFeedValidator::new(&rss_data);
642 let mut errors = Vec::new();
643 validator.validate_dates(&mut errors);
644
645 assert!(!errors.is_empty(), "Expected date validation errors");
646 assert!(errors.iter().any(|e| e.field == "item[0].pubDate"));
647 }
648
649 #[test]
650 fn test_validate_url_exceeds_max_length() {
651 let mut errors = Vec::new();
652 let long_url = format!(
653 "https://example.com/{}",
654 "a".repeat(MAX_URL_LENGTH)
655 );
656
657 RssFeedValidator::validate_url(&long_url, "test", &mut errors);
658
659 assert_eq!(errors.len(), 1);
660 assert!(errors[0]
661 .message
662 .contains("URL exceeds maximum length"));
663 }
664
665 #[test]
666 fn test_validate_structure_with_invalid_item_link() {
667 let mut rss_data = RssData::new(Some(RssVersion::RSS2_0))
668 .title("Test Feed")
669 .link("https://example.com")
670 .description("A test feed")
671 .atom_link("https://example.com/feed.xml");
672
673 rss_data.add_item(
674 RssItem::new()
675 .title("Item")
676 .link("bad url with spaces")
677 .description("Desc")
678 .guid("guid1"),
679 );
680
681 let validator = RssFeedValidator::new(&rss_data);
682 let mut errors = Vec::new();
683 validator.validate_structure(&mut errors);
684
685 assert!(errors.iter().any(|e| e.field == "item.0.link"
686 && e.message.contains("Invalid item.0.link")));
687 }
688
689 #[test]
690 fn test_validate_structure_allows_empty_item_link() {
691 let mut rss_data = RssData::new(Some(RssVersion::RSS2_0))
696 .title("Test Feed")
697 .link("https://example.com")
698 .description("A test feed")
699 .atom_link("https://example.com/feed.xml");
700
701 rss_data.add_item(
702 RssItem::new()
703 .title("Item")
704 .description("Body only — no link")
705 .guid("guid-no-link"),
706 );
707
708 let validator = RssFeedValidator::new(&rss_data);
709 let mut errors = Vec::new();
710 validator.validate_structure(&mut errors);
711
712 assert!(
716 !errors.iter().any(|e| e.field == "item.0.link"),
717 "empty item.link should be accepted, got: {errors:?}"
718 );
719 }
720
721 #[test]
722 fn test_validate_structure_allows_relative_item_link() {
723 let mut rss_data = RssData::new(Some(RssVersion::RSS2_0))
725 .title("Test Feed")
726 .link("https://example.com")
727 .description("A test feed")
728 .atom_link("https://example.com/feed.xml");
729
730 rss_data.add_item(
731 RssItem::new()
732 .title("Item")
733 .link("/tags/")
734 .description("Tag index")
735 .guid("guid-tags"),
736 );
737
738 let validator = RssFeedValidator::new(&rss_data);
739 let mut errors = Vec::new();
740 validator.validate_structure(&mut errors);
741 assert!(
742 !errors.iter().any(|e| e.field == "item.0.link"),
743 "relative item.link should be accepted, got: {errors:?}"
744 );
745 }
746
747 #[test]
748 fn test_parse_date_accepts_numeric_timezone_offset() {
749 assert!(RssFeedValidator::parse_date(
754 "Sun, 28 Jun 2026 00:12:20 +0000"
755 )
756 .is_ok());
757 assert!(RssFeedValidator::parse_date(
758 "Sat, 27 Jun 2026 19:12:20 -0500"
759 )
760 .is_ok());
761 }
762
763 #[test]
764 fn test_parse_date_accepts_iso8601() {
765 assert!(RssFeedValidator::parse_date("2026-06-28T00:12:20Z")
768 .is_ok());
769 }
770
771 #[test]
772 fn test_parse_date_no_longer_requires_gmt_suffix() {
773 assert!(RssFeedValidator::parse_date(
777 "Mon, 01 Jan 2024 00:00:00 +0000"
778 )
779 .is_ok());
780 }
781
782 #[test]
783 fn test_validate_rss_feed_convenience_function() {
784 let mut rss_data = RssData::new(Some(RssVersion::RSS2_0))
785 .title("Test Feed")
786 .link("https://example.com")
787 .description("A test feed")
788 .atom_link("https://example.com/feed.xml")
789 .pub_date("Mon, 01 Jan 2024 00:00:00 GMT")
790 .generator("RSS Gen Test");
791
792 rss_data.add_item(
793 RssItem::new()
794 .title("Test Item")
795 .link("https://example.com/item1")
796 .description("A test item")
797 .guid("unique-id-1")
798 .pub_date("Mon, 01 Jan 2024 00:00:00 GMT"),
799 );
800
801 assert!(validate_rss_feed(&rss_data).is_ok());
802 }
803
804 #[test]
805 fn test_validate_rss_feed_convenience_function_invalid() {
806 let rss_data = RssData::new(Some(RssVersion::RSS2_0));
807 assert!(validate_rss_feed(&rss_data).is_err());
808 }
809}