1use crate::data::{RssData, RssVersion};
10use crate::error::{Result, RssError, ValidationError};
11use dtt::datetime::DateTime;
12use url::Url;
13
14const MAX_URL_LENGTH: usize = 2000;
16
17#[derive(Debug)]
19pub struct RssFeedValidator<'a> {
20 rss_data: &'a RssData,
21}
22
23impl<'a> RssFeedValidator<'a> {
24 #[must_use]
34 pub fn new(rss_data: &'a RssData) -> Self {
35 RssFeedValidator { rss_data }
36 }
37
38 pub fn validate(&self) -> Result<()> {
52 let mut errors = Vec::new();
53
54 self.validate_rss_data(&mut errors);
55 self.validate_structure(&mut errors);
56 self.validate_items(&mut errors);
57 self.validate_dates(&mut errors);
58 self.validate_version_specific(&mut errors);
59
60 if errors.is_empty() {
61 Ok(())
62 } else {
63 Err(RssError::ValidationErrors(
64 errors.into_iter().map(|e| e.to_string()).collect(),
65 ))
66 }
67 }
68
69 fn validate_rss_data(&self, errors: &mut Vec<ValidationError>) {
71 if let Err(e) = self.rss_data.validate() {
72 errors.push(ValidationError {
73 field: "rss_data".to_string(),
74 message: e.to_string(),
75 });
76 }
77 }
78
79 fn validate_structure(&self, errors: &mut Vec<ValidationError>) {
81 Self::validate_url(&self.rss_data.link, "channel link", errors);
82
83 for (index, item) in self.rss_data.items.iter().enumerate() {
84 Self::validate_url(
85 &item.link,
86 &format!("item[{index}] link"),
87 errors,
88 );
89 }
90
91 if self.rss_data.items.is_empty() {
92 errors.push(ValidationError {
93 field: "items".to_string(),
94 message: "RSS feed must contain at least one item"
95 .to_string(),
96 });
97 }
98
99 self.validate_guids(errors);
100 self.validate_atom_link(errors);
101 }
102
103 fn validate_guids(&self, errors: &mut Vec<ValidationError>) {
105 let mut guids = std::collections::HashSet::new();
106 for item in &self.rss_data.items {
107 if !guids.insert(&item.guid) {
108 errors.push(ValidationError {
109 field: "guid".to_string(),
110 message: format!(
111 "Duplicate GUID found: {}",
112 item.guid
113 ),
114 });
115 }
116 }
117 }
118
119 fn validate_atom_link(&self, errors: &mut Vec<ValidationError>) {
121 if self.rss_data.version == RssVersion::RSS2_0
122 && self.rss_data.atom_link.is_empty()
123 {
124 errors.push(ValidationError {
125 field: "atom_link".to_string(),
126 message: "atom:link is required for RSS 2.0 feeds"
127 .to_string(),
128 });
129 }
130 }
131
132 fn validate_items(&self, errors: &mut Vec<ValidationError>) {
134 for (index, item) in self.rss_data.items.iter().enumerate() {
135 if let Err(e) = item.validate() {
136 errors.push(ValidationError {
137 field: format!("item[{index}]"),
138 message: format!("Item validation failed: {e}"),
139 });
140 }
141 }
142 }
143
144 fn validate_dates(&self, errors: &mut Vec<ValidationError>) {
146 Self::validate_date(&self.rss_data.pub_date, "pubDate", errors);
147 Self::validate_date(
148 &self.rss_data.last_build_date,
149 "lastBuildDate",
150 errors,
151 );
152
153 for (index, item) in self.rss_data.items.iter().enumerate() {
154 Self::validate_date(
155 &item.pub_date,
156 &format!("item[{index}].pubDate"),
157 errors,
158 );
159 }
160 }
161
162 fn validate_date(
164 date_str: &str,
165 field: &str,
166 errors: &mut Vec<ValidationError>,
167 ) {
168 if !date_str.is_empty() {
169 if let Err(e) = Self::parse_date(date_str) {
170 errors.push(ValidationError {
171 field: field.to_string(),
172 message: format!("Invalid date format: {e}"),
173 });
174 }
175 }
176 }
177
178 pub fn parse_date(date_str: &str) -> Result<DateTime> {
192 let rss_date_format = "[weekday repr:short], [day] [month repr:short] [year] [hour]:[minute]:[second]";
193 let date_without_gmt =
194 date_str.strip_suffix(" GMT").ok_or_else(|| {
195 RssError::DateParseError(format!(
196 "Invalid date format (missing GMT): {date_str}"
197 ))
198 })?;
199
200 let date = DateTime::parse_custom_format(
201 date_without_gmt,
202 rss_date_format,
203 )
204 .map_err(|_| {
205 RssError::DateParseError(format!(
206 "Failed to parse date: {date_str}"
207 ))
208 })?;
209
210 Ok(date)
211 }
212
213 fn validate_version_specific(
215 &self,
216 errors: &mut Vec<ValidationError>,
217 ) {
218 match self.rss_data.version {
219 RssVersion::RSS2_0 => {
220 if self.rss_data.generator.is_empty() {
221 errors.push(ValidationError {
222 field: "generator".to_string(),
223 message:
224 "generator is recommended for RSS 2.0 feeds"
225 .to_string(),
226 });
227 }
228 if self.rss_data.atom_link.is_empty() {
229 errors.push(ValidationError {
230 field: "atom_link".to_string(),
231 message:
232 "atom:link is required for RSS 2.0 feeds"
233 .to_string(),
234 });
235 }
236 }
237 RssVersion::RSS1_0 => {
238 if self
239 .rss_data
240 .items
241 .iter()
242 .any(|item| item.guid.is_empty())
243 {
244 errors.push(ValidationError {
245 field: "guid".to_string(),
246 message:
247 "All items must have a guid in RSS 1.0"
248 .to_string(),
249 });
250 }
251 }
252 _ => {}
253 }
254 }
255
256 fn validate_url(
264 url: &str,
265 field: &str,
266 errors: &mut Vec<ValidationError>,
267 ) {
268 if url.len() > MAX_URL_LENGTH {
269 errors.push(ValidationError {
270 field: field.to_string(),
271 message: format!(
272 "URL exceeds maximum length of {MAX_URL_LENGTH} characters"
273 ),
274 });
275 return;
276 }
277
278 match Url::parse(url) {
279 Ok(parsed_url) => {
280 if parsed_url.scheme() != "http"
281 && parsed_url.scheme() != "https"
282 {
283 errors.push(ValidationError {
284 field: field.to_string(),
285 message: format!("Invalid URL scheme in {field}: {url}. Only HTTP and HTTPS are allowed."),
286 });
287 }
288 }
289 Err(_) => {
290 errors.push(ValidationError {
291 field: field.to_string(),
292 message: format!("Invalid URL in {field}: {url}"),
293 });
294 }
295 }
296 }
297}
298
299pub fn validate_rss_feed(rss_data: &RssData) -> Result<()> {
314 let validator = RssFeedValidator::new(rss_data);
315 validator.validate()
316}
317
318#[cfg(test)]
319mod tests {
320 use super::*;
321 use crate::data::RssItem;
322
323 #[test]
324 fn test_valid_rss_feed() {
325 let mut rss_data = RssData::new(Some(RssVersion::RSS2_0))
326 .title("Test Feed")
327 .link("https://example.com")
328 .description("A test feed")
329 .atom_link("https://example.com/feed.xml")
330 .pub_date("Mon, 01 Jan 2024 00:00:00 GMT")
331 .generator("RSS Gen Test");
332
333 rss_data.add_item(
334 RssItem::new()
335 .title("Test Item")
336 .link("https://example.com/item1")
337 .description("A test item")
338 .guid("unique-id-1")
339 .pub_date("Mon, 01 Jan 2024 00:00:00 GMT"),
340 );
341
342 let validator = RssFeedValidator::new(&rss_data);
343 assert!(validator.validate().is_ok());
344 }
345
346 #[test]
347 fn test_invalid_rss_feed() {
348 let rss_data = RssData::new(Some(RssVersion::RSS2_0))
349 .title("Test Feed")
350 .link("https://example.com")
351 .description("A test feed")
352 .pub_date("Invalid Date");
353
354 let validator = RssFeedValidator::new(&rss_data);
355 let result = validator.validate();
356 assert!(result.is_err());
357 if let Err(RssError::ValidationErrors(errors)) = result {
358 assert!(errors
359 .iter()
360 .any(|e| e.contains("atom:link is required")));
361 assert!(errors.iter().any(|e| e
362 .contains("RSS feed must contain at least one item")));
363 assert!(errors
364 .iter()
365 .any(|e| e.contains("Invalid date format")));
366 } else {
367 panic!("Expected ValidationErrors");
368 }
369 }
370
371 #[test]
372 fn test_validate_url_valid() {
373 let rss_data = RssData::new(None);
374 let mut errors = Vec::new();
375
376 RssFeedValidator::validate_url(
377 "https://example.com",
378 "test",
379 &mut errors,
380 );
381 RssFeedValidator::validate_url(
382 "http://example.com",
383 "test",
384 &mut errors,
385 );
386 RssFeedValidator::validate_url(
387 "https://sub.example.com/path?query=value",
388 "test",
389 &mut errors,
390 );
391
392 assert!(errors.is_empty());
393 assert!(rss_data.link.is_empty());
394 }
395
396 #[test]
397 fn test_validate_url_invalid() {
398 let mut errors = Vec::new();
399
400 RssFeedValidator::validate_url(
401 "not a url",
402 "test",
403 &mut errors,
404 );
405 RssFeedValidator::validate_url(
406 "ftp://example.com",
407 "test",
408 &mut errors,
409 );
410 RssFeedValidator::validate_url("http://", "test", &mut errors);
411 RssFeedValidator::validate_url("https://", "test", &mut errors);
412 RssFeedValidator::validate_url(
413 "file:///path/to/file",
414 "test",
415 &mut errors,
416 );
417
418 assert_eq!(errors.len(), 5);
419 }
420
421 #[test]
422 fn test_validate_structure_with_urls() {
423 let mut rss_data = RssData::new(Some(RssVersion::RSS2_0))
424 .title("Test Feed")
425 .link("https://example.com")
426 .description("A test feed")
427 .atom_link("https://example.com/feed.xml");
428
429 rss_data.add_item(
430 RssItem::new()
431 .title("Test Item")
432 .link("https://example.com/item1")
433 .description("A test item")
434 .guid("unique-id-1"),
435 );
436
437 let validator = RssFeedValidator::new(&rss_data);
438 let mut errors = Vec::new();
439 validator.validate_structure(&mut errors);
440 assert!(errors.is_empty());
441
442 rss_data.link = "not a url".to_string();
444 let validator = RssFeedValidator::new(&rss_data);
445 let mut errors = Vec::new();
446 validator.validate_structure(&mut errors);
447 assert!(errors
448 .iter()
449 .any(|e| e.message.contains("Invalid URL")));
450 }
451
452 #[test]
453 fn test_validate_version_specific_rss2_0() {
454 let mut rss_data = RssData::new(Some(RssVersion::RSS2_0))
455 .title("Test Feed")
456 .link("https://example.com")
457 .description("A test feed")
458 .atom_link("https://example.com/feed.xml")
459 .generator("RSS Gen Test");
460
461 let validator = RssFeedValidator::new(&rss_data);
462 let mut errors = Vec::new();
463 validator.validate_version_specific(&mut errors);
464 assert!(errors.is_empty());
465
466 rss_data.generator = String::new();
468 let validator = RssFeedValidator::new(&rss_data);
469 let mut errors = Vec::new();
470 validator.validate_version_specific(&mut errors);
471 assert!(errors
472 .iter()
473 .any(|e| e.message.contains("generator is recommended")));
474
475 rss_data.atom_link = String::new();
477 let validator = RssFeedValidator::new(&rss_data);
478 let mut errors = Vec::new();
479 validator.validate_version_specific(&mut errors);
480 assert!(errors
481 .iter()
482 .any(|e| e.message.contains("atom:link is required")));
483 }
484
485 #[test]
486 fn test_validate_version_specific_rss1_0() {
487 let mut rss_data = RssData::new(Some(RssVersion::RSS1_0))
488 .title("Test Feed")
489 .link("https://example.com")
490 .description("A test feed");
491
492 rss_data.add_item(
493 RssItem::new()
494 .title("Test Item")
495 .link("https://example.com/item1")
496 .description("A test item")
497 .guid("unique-id-1"),
498 );
499
500 let validator = RssFeedValidator::new(&rss_data);
501 let mut errors = Vec::new();
502 validator.validate_version_specific(&mut errors);
503 assert!(errors.is_empty());
504
505 rss_data.items[0].guid = String::new();
507 let validator = RssFeedValidator::new(&rss_data);
508 let mut errors = Vec::new();
509 validator.validate_version_specific(&mut errors);
510 assert!(errors.iter().any(|e| e
511 .message
512 .contains("All items must have a guid in RSS 1.0")));
513 }
514
515 #[test]
516 fn test_validate_version_specific_older_versions() {
517 for version in &[
518 RssVersion::RSS0_90,
519 RssVersion::RSS0_91,
520 RssVersion::RSS0_92,
521 ] {
522 let rss_data = RssData::new(Some(*version))
523 .title("Test Feed")
524 .link("https://example.com")
525 .description("A test feed");
526
527 let validator = RssFeedValidator::new(&rss_data);
528 let mut errors = Vec::new();
529 validator.validate_version_specific(&mut errors);
530 assert!(
531 errors.is_empty(),
532 "Unexpected errors for version {version:?}"
533 );
534 }
535 }
536
537 #[test]
538 fn test_parse_date_valid() {
539 let valid_date = "Mon, 01 Jan 2024 00:00:00 GMT";
540 assert!(RssFeedValidator::parse_date(valid_date).is_ok());
541 }
542
543 #[test]
544 fn test_parse_date_invalid() {
545 let invalid_date = "Invalid Date";
546 assert!(RssFeedValidator::parse_date(invalid_date).is_err());
547 }
548
549 #[test]
550 fn test_validate_guids() {
551 let mut rss_data = RssData::new(Some(RssVersion::RSS2_0))
552 .title("Test Feed")
553 .link("https://example.com")
554 .description("A test feed");
555
556 rss_data.add_item(RssItem::new().guid("guid1"));
557 rss_data.add_item(RssItem::new().guid("guid2"));
558 rss_data.add_item(RssItem::new().guid("guid1")); let validator = RssFeedValidator::new(&rss_data);
561 let mut errors = Vec::new();
562 validator.validate_guids(&mut errors);
563
564 assert_eq!(errors.len(), 1);
565 assert!(errors[0]
566 .message
567 .contains("Duplicate GUID found: guid1"));
568 }
569
570 #[test]
571 fn test_validate_atom_link() {
572 let rss_data = RssData::new(Some(RssVersion::RSS2_0))
573 .title("Test Feed")
574 .link("https://example.com")
575 .description("A test feed");
576
577 let validator = RssFeedValidator::new(&rss_data);
578 let mut errors = Vec::new();
579 validator.validate_atom_link(&mut errors);
580
581 assert_eq!(errors.len(), 1);
582 assert!(errors[0]
583 .message
584 .contains("atom:link is required for RSS 2.0 feeds"));
585
586 let rss_data_with_atom =
587 rss_data.atom_link("https://example.com/feed.xml");
588 let validator = RssFeedValidator::new(&rss_data_with_atom);
589 let mut errors = Vec::new();
590 validator.validate_atom_link(&mut errors);
591
592 assert!(errors.is_empty());
593 }
594
595 #[test]
596 fn test_validate_rss_data() {
597 let invalid_rss_data = RssData::new(Some(RssVersion::RSS2_0)); let validator = RssFeedValidator::new(&invalid_rss_data);
600 let mut errors = Vec::new();
601 validator.validate_rss_data(&mut errors);
602
603 assert!(!errors.is_empty());
604 assert!(errors[0].message.contains("Title is missing"));
605 }
606
607 #[test]
608 fn test_validate_items_with_invalid_item() {
609 let mut rss_data = RssData::new(Some(RssVersion::RSS2_0))
610 .title("Test Feed")
611 .link("https://example.com")
612 .description("A test feed")
613 .atom_link("https://example.com/feed.xml")
614 .generator("Test");
615
616 rss_data.add_item(RssItem::new().guid("guid1"));
618
619 let validator = RssFeedValidator::new(&rss_data);
620 let mut errors = Vec::new();
621 validator.validate_items(&mut errors);
622
623 assert!(!errors.is_empty(), "Expected item validation errors");
624 assert!(errors[0].field.contains("item[0]"));
625 assert!(errors[0].message.contains("Item validation failed"));
626 }
627
628 #[test]
629 fn test_validate_dates_with_invalid_item_date() {
630 let mut rss_data = RssData::new(Some(RssVersion::RSS2_0))
631 .title("Test Feed")
632 .link("https://example.com")
633 .description("A test feed")
634 .atom_link("https://example.com/feed.xml")
635 .pub_date("Mon, 01 Jan 2024 00:00:00 GMT")
636 .generator("Test");
637
638 rss_data.add_item(
639 RssItem::new()
640 .title("Item")
641 .link("https://example.com/item")
642 .description("Desc")
643 .guid("guid1")
644 .pub_date("not a valid date"),
645 );
646
647 let validator = RssFeedValidator::new(&rss_data);
648 let mut errors = Vec::new();
649 validator.validate_dates(&mut errors);
650
651 assert!(!errors.is_empty(), "Expected date validation errors");
652 assert!(errors
653 .iter()
654 .any(|e| e.field.contains("item[0].pubDate")));
655 }
656
657 #[test]
658 fn test_validate_url_exceeds_max_length() {
659 let mut errors = Vec::new();
660 let long_url = format!(
661 "https://example.com/{}",
662 "a".repeat(MAX_URL_LENGTH)
663 );
664
665 RssFeedValidator::validate_url(&long_url, "test", &mut errors);
666
667 assert_eq!(errors.len(), 1);
668 assert!(errors[0]
669 .message
670 .contains("URL exceeds maximum length"));
671 }
672
673 #[test]
674 fn test_validate_structure_with_invalid_item_link() {
675 let mut rss_data = RssData::new(Some(RssVersion::RSS2_0))
676 .title("Test Feed")
677 .link("https://example.com")
678 .description("A test feed")
679 .atom_link("https://example.com/feed.xml");
680
681 rss_data.add_item(
682 RssItem::new()
683 .title("Item")
684 .link("not-a-valid-url")
685 .description("Desc")
686 .guid("guid1"),
687 );
688
689 let validator = RssFeedValidator::new(&rss_data);
690 let mut errors = Vec::new();
691 validator.validate_structure(&mut errors);
692
693 assert!(errors
694 .iter()
695 .any(|e| e.field.contains("item[0] link")));
696 }
697
698 #[test]
699 fn test_parse_date_missing_gmt_suffix() {
700 let result =
701 RssFeedValidator::parse_date("Mon, 01 Jan 2024 00:00:00");
702 assert!(result.is_err());
703 if let Err(RssError::DateParseError(msg)) = result {
704 assert!(msg.contains("missing GMT"));
705 } else {
706 panic!("Expected DateParseError");
707 }
708 }
709
710 #[test]
711 fn test_parse_date_invalid_format_with_gmt() {
712 let result = RssFeedValidator::parse_date("not-a-date GMT");
713 assert!(result.is_err());
714 if let Err(RssError::DateParseError(msg)) = result {
715 assert!(msg.contains("Failed to parse date"));
716 } else {
717 panic!("Expected DateParseError");
718 }
719 }
720
721 #[test]
722 fn test_validate_rss_feed_convenience_function() {
723 let mut rss_data = RssData::new(Some(RssVersion::RSS2_0))
724 .title("Test Feed")
725 .link("https://example.com")
726 .description("A test feed")
727 .atom_link("https://example.com/feed.xml")
728 .pub_date("Mon, 01 Jan 2024 00:00:00 GMT")
729 .generator("RSS Gen Test");
730
731 rss_data.add_item(
732 RssItem::new()
733 .title("Test Item")
734 .link("https://example.com/item1")
735 .description("A test item")
736 .guid("unique-id-1")
737 .pub_date("Mon, 01 Jan 2024 00:00:00 GMT"),
738 );
739
740 assert!(validate_rss_feed(&rss_data).is_ok());
741 }
742
743 #[test]
744 fn test_validate_rss_feed_convenience_function_invalid() {
745 let rss_data = RssData::new(Some(RssVersion::RSS2_0));
746 assert!(validate_rss_feed(&rss_data).is_err());
747 }
748}