Skip to main content

rss_gen/
validator.rs

1// Copyright © 2024 RSS Gen. All rights reserved.
2// SPDX-License-Identifier: Apache-2.0 OR MIT
3
4//! RSS feed validator module
5//!
6//! This module provides functionality to validate RSS feeds, ensuring they
7//! conform to the specified RSS version standards and contain valid data.
8
9use crate::data::{RssData, RssVersion};
10use crate::error::{Result, RssError, ValidationError};
11use dtt::datetime::DateTime;
12use url::Url;
13
14/// Maximum allowed length for URL strings
15const MAX_URL_LENGTH: usize = 2000;
16
17/// RSS feed validator for validating the structure and content of an RSS feed.
18#[derive(Debug)]
19pub struct RssFeedValidator<'a> {
20    rss_data: &'a RssData,
21}
22
23impl<'a> RssFeedValidator<'a> {
24    /// Creates a new `RssFeedValidator` instance with the provided `RssData`.
25    ///
26    /// # Arguments
27    ///
28    /// * `rss_data` - A reference to the `RssData` to be validated.
29    ///
30    /// # Returns
31    ///
32    /// A new instance of `RssFeedValidator`.
33    #[must_use]
34    pub fn new(rss_data: &'a RssData) -> Self {
35        RssFeedValidator { rss_data }
36    }
37
38    /// Validates the RSS feed structure and content.
39    ///
40    /// This method performs a comprehensive validation of the RSS feed,
41    /// including structure, items, dates, and version-specific requirements.
42    ///
43    /// # Returns
44    ///
45    /// * `Ok(())` if the validation passes.
46    /// * `Err(RssError::ValidationErrors)` containing a list of validation errors if any are found.
47    ///
48    /// # Errors
49    ///
50    /// This function returns an `Err(RssError::ValidationErrors)` if any validation checks fail.
51    pub fn validate(&self) -> Result<()> {
52        let mut errors = Vec::new();
53
54        self.validate_rss_data(&mut errors);
55        self.validate_structure(&mut errors);
56        self.validate_items(&mut errors);
57        self.validate_dates(&mut errors);
58        self.validate_version_specific(&mut errors);
59
60        if errors.is_empty() {
61            Ok(())
62        } else {
63            Err(RssError::ValidationErrors(
64                errors.into_iter().map(|e| e.to_string()).collect(),
65            ))
66        }
67    }
68
69    /// Validates the base `RssData` structure.
70    fn validate_rss_data(&self, errors: &mut Vec<ValidationError>) {
71        if let Err(e) = self.rss_data.validate() {
72            errors.push(ValidationError {
73                field: "rss_data".to_string(),
74                message: e.to_string(),
75            });
76        }
77    }
78
79    /// Validates the overall structure of the RSS feed.
80    fn validate_structure(&self, errors: &mut Vec<ValidationError>) {
81        Self::validate_url(&self.rss_data.link, "channel link", errors);
82
83        for (index, item) in self.rss_data.items.iter().enumerate() {
84            Self::validate_url(
85                &item.link,
86                &format!("item[{index}] link"),
87                errors,
88            );
89        }
90
91        if self.rss_data.items.is_empty() {
92            errors.push(ValidationError {
93                field: "items".to_string(),
94                message: "RSS feed must contain at least one item"
95                    .to_string(),
96            });
97        }
98
99        self.validate_guids(errors);
100        self.validate_atom_link(errors);
101    }
102
103    /// Validates that all GUIDs in the feed are unique.
104    fn validate_guids(&self, errors: &mut Vec<ValidationError>) {
105        let mut guids = std::collections::HashSet::new();
106        for item in &self.rss_data.items {
107            if !guids.insert(&item.guid) {
108                errors.push(ValidationError {
109                    field: "guid".to_string(),
110                    message: format!(
111                        "Duplicate GUID found: {}",
112                        item.guid
113                    ),
114                });
115            }
116        }
117    }
118
119    /// Validates the presence of atom:link for RSS 2.0 feeds.
120    fn validate_atom_link(&self, errors: &mut Vec<ValidationError>) {
121        if self.rss_data.version == RssVersion::RSS2_0
122            && self.rss_data.atom_link.is_empty()
123        {
124            errors.push(ValidationError {
125                field: "atom_link".to_string(),
126                message: "atom:link is required for RSS 2.0 feeds"
127                    .to_string(),
128            });
129        }
130    }
131
132    /// Validates individual items in the RSS feed.
133    fn validate_items(&self, errors: &mut Vec<ValidationError>) {
134        for (index, item) in self.rss_data.items.iter().enumerate() {
135            if let Err(e) = item.validate() {
136                errors.push(ValidationError {
137                    field: format!("item[{index}]"),
138                    message: format!("Item validation failed: {e}"),
139                });
140            }
141        }
142    }
143
144    /// Validates all dates in the RSS feed.
145    fn validate_dates(&self, errors: &mut Vec<ValidationError>) {
146        Self::validate_date(&self.rss_data.pub_date, "pubDate", errors);
147        Self::validate_date(
148            &self.rss_data.last_build_date,
149            "lastBuildDate",
150            errors,
151        );
152
153        for (index, item) in self.rss_data.items.iter().enumerate() {
154            Self::validate_date(
155                &item.pub_date,
156                &format!("item[{index}].pubDate"),
157                errors,
158            );
159        }
160    }
161
162    /// Validates a single date string.
163    fn validate_date(
164        date_str: &str,
165        field: &str,
166        errors: &mut Vec<ValidationError>,
167    ) {
168        if !date_str.is_empty() {
169            if let Err(e) = Self::parse_date(date_str) {
170                errors.push(ValidationError {
171                    field: field.to_string(),
172                    message: format!("Invalid date format: {e}"),
173                });
174            }
175        }
176    }
177
178    /// Parses a date string into a `DateTime` object.
179    ///
180    /// # Arguments
181    ///
182    /// * `date_str` - The date string to parse.
183    ///
184    /// # Returns
185    ///
186    /// A `Result` containing the parsed `DateTime` object or an error if the parsing fails.
187    ///
188    /// # Errors
189    ///
190    /// This function returns an `Err(RssError::DateParseError)` if the date format is invalid.
191    pub fn parse_date(date_str: &str) -> Result<DateTime> {
192        let rss_date_format = "[weekday repr:short], [day] [month repr:short] [year] [hour]:[minute]:[second]";
193        let date_without_gmt =
194            date_str.strip_suffix(" GMT").ok_or_else(|| {
195                RssError::DateParseError(format!(
196                    "Invalid date format (missing GMT): {date_str}"
197                ))
198            })?;
199
200        let date = DateTime::parse_custom_format(
201            date_without_gmt,
202            rss_date_format,
203        )
204        .map_err(|_| {
205            RssError::DateParseError(format!(
206                "Failed to parse date: {date_str}"
207            ))
208        })?;
209
210        Ok(date)
211    }
212
213    /// Validates version-specific requirements of the RSS feed.
214    fn validate_version_specific(
215        &self,
216        errors: &mut Vec<ValidationError>,
217    ) {
218        match self.rss_data.version {
219            RssVersion::RSS2_0 => {
220                if self.rss_data.generator.is_empty() {
221                    errors.push(ValidationError {
222                        field: "generator".to_string(),
223                        message:
224                            "generator is recommended for RSS 2.0 feeds"
225                                .to_string(),
226                    });
227                }
228                if self.rss_data.atom_link.is_empty() {
229                    errors.push(ValidationError {
230                        field: "atom_link".to_string(),
231                        message:
232                            "atom:link is required for RSS 2.0 feeds"
233                                .to_string(),
234                    });
235                }
236            }
237            RssVersion::RSS1_0 => {
238                if self
239                    .rss_data
240                    .items
241                    .iter()
242                    .any(|item| item.guid.is_empty())
243                {
244                    errors.push(ValidationError {
245                        field: "guid".to_string(),
246                        message:
247                            "All items must have a guid in RSS 1.0"
248                                .to_string(),
249                    });
250                }
251            }
252            _ => {}
253        }
254    }
255
256    /// Validates a URL string.
257    ///
258    /// # Arguments
259    ///
260    /// * `url` - The URL string to validate.
261    /// * `field` - The field name for error reporting.
262    /// * `errors` - A mutable vector to collect validation errors.
263    fn validate_url(
264        url: &str,
265        field: &str,
266        errors: &mut Vec<ValidationError>,
267    ) {
268        if url.len() > MAX_URL_LENGTH {
269            errors.push(ValidationError {
270                field: field.to_string(),
271                message: format!(
272                    "URL exceeds maximum length of {MAX_URL_LENGTH} characters"
273                ),
274            });
275            return;
276        }
277
278        match Url::parse(url) {
279            Ok(parsed_url) => {
280                if parsed_url.scheme() != "http"
281                    && parsed_url.scheme() != "https"
282                {
283                    errors.push(ValidationError {
284                        field: field.to_string(),
285                        message: format!("Invalid URL scheme in {field}: {url}. Only HTTP and HTTPS are allowed."),
286                    });
287                }
288            }
289            Err(_) => {
290                errors.push(ValidationError {
291                    field: field.to_string(),
292                    message: format!("Invalid URL in {field}: {url}"),
293                });
294            }
295        }
296    }
297}
298
299/// Validates the provided `RssData` and returns a `Result` indicating success or failure.
300///
301/// # Arguments
302///
303/// * `rss_data` - A reference to the `RssData` to be validated.
304///
305/// # Returns
306///
307/// * `Ok(())` if the validation passes.
308/// * `Err(RssError::ValidationErrors)` containing a list of validation errors if any are found.
309///
310/// # Errors
311///
312/// This function returns an `Err(RssError::ValidationErrors)` if any validation checks fail.
313pub fn validate_rss_feed(rss_data: &RssData) -> Result<()> {
314    let validator = RssFeedValidator::new(rss_data);
315    validator.validate()
316}
317
318#[cfg(test)]
319mod tests {
320    use super::*;
321    use crate::data::RssItem;
322
323    #[test]
324    fn test_valid_rss_feed() {
325        let mut rss_data = RssData::new(Some(RssVersion::RSS2_0))
326            .title("Test Feed")
327            .link("https://example.com")
328            .description("A test feed")
329            .atom_link("https://example.com/feed.xml")
330            .pub_date("Mon, 01 Jan 2024 00:00:00 GMT")
331            .generator("RSS Gen Test");
332
333        rss_data.add_item(
334            RssItem::new()
335                .title("Test Item")
336                .link("https://example.com/item1")
337                .description("A test item")
338                .guid("unique-id-1")
339                .pub_date("Mon, 01 Jan 2024 00:00:00 GMT"),
340        );
341
342        let validator = RssFeedValidator::new(&rss_data);
343        assert!(validator.validate().is_ok());
344    }
345
346    #[test]
347    fn test_invalid_rss_feed() {
348        let rss_data = RssData::new(Some(RssVersion::RSS2_0))
349            .title("Test Feed")
350            .link("https://example.com")
351            .description("A test feed")
352            .pub_date("Invalid Date");
353
354        let validator = RssFeedValidator::new(&rss_data);
355        let result = validator.validate();
356        assert!(result.is_err());
357        if let Err(RssError::ValidationErrors(errors)) = result {
358            assert!(errors
359                .iter()
360                .any(|e| e.contains("atom:link is required")));
361            assert!(errors.iter().any(|e| e
362                .contains("RSS feed must contain at least one item")));
363            assert!(errors
364                .iter()
365                .any(|e| e.contains("Invalid date format")));
366        } else {
367            panic!("Expected ValidationErrors");
368        }
369    }
370
371    #[test]
372    fn test_validate_url_valid() {
373        let rss_data = RssData::new(None);
374        let mut errors = Vec::new();
375
376        RssFeedValidator::validate_url(
377            "https://example.com",
378            "test",
379            &mut errors,
380        );
381        RssFeedValidator::validate_url(
382            "http://example.com",
383            "test",
384            &mut errors,
385        );
386        RssFeedValidator::validate_url(
387            "https://sub.example.com/path?query=value",
388            "test",
389            &mut errors,
390        );
391
392        assert!(errors.is_empty());
393        assert!(rss_data.link.is_empty());
394    }
395
396    #[test]
397    fn test_validate_url_invalid() {
398        let mut errors = Vec::new();
399
400        RssFeedValidator::validate_url(
401            "not a url",
402            "test",
403            &mut errors,
404        );
405        RssFeedValidator::validate_url(
406            "ftp://example.com",
407            "test",
408            &mut errors,
409        );
410        RssFeedValidator::validate_url("http://", "test", &mut errors);
411        RssFeedValidator::validate_url("https://", "test", &mut errors);
412        RssFeedValidator::validate_url(
413            "file:///path/to/file",
414            "test",
415            &mut errors,
416        );
417
418        assert_eq!(errors.len(), 5);
419    }
420
421    #[test]
422    fn test_validate_structure_with_urls() {
423        let mut rss_data = RssData::new(Some(RssVersion::RSS2_0))
424            .title("Test Feed")
425            .link("https://example.com")
426            .description("A test feed")
427            .atom_link("https://example.com/feed.xml");
428
429        rss_data.add_item(
430            RssItem::new()
431                .title("Test Item")
432                .link("https://example.com/item1")
433                .description("A test item")
434                .guid("unique-id-1"),
435        );
436
437        let validator = RssFeedValidator::new(&rss_data);
438        let mut errors = Vec::new();
439        validator.validate_structure(&mut errors);
440        assert!(errors.is_empty());
441
442        // Test with invalid URL
443        rss_data.link = "not a url".to_string();
444        let validator = RssFeedValidator::new(&rss_data);
445        let mut errors = Vec::new();
446        validator.validate_structure(&mut errors);
447        assert!(errors
448            .iter()
449            .any(|e| e.message.contains("Invalid URL")));
450    }
451
452    #[test]
453    fn test_validate_version_specific_rss2_0() {
454        let mut rss_data = RssData::new(Some(RssVersion::RSS2_0))
455            .title("Test Feed")
456            .link("https://example.com")
457            .description("A test feed")
458            .atom_link("https://example.com/feed.xml")
459            .generator("RSS Gen Test");
460
461        let validator = RssFeedValidator::new(&rss_data);
462        let mut errors = Vec::new();
463        validator.validate_version_specific(&mut errors);
464        assert!(errors.is_empty());
465
466        // Test without generator
467        rss_data.generator = String::new();
468        let validator = RssFeedValidator::new(&rss_data);
469        let mut errors = Vec::new();
470        validator.validate_version_specific(&mut errors);
471        assert!(errors
472            .iter()
473            .any(|e| e.message.contains("generator is recommended")));
474
475        // Test without atom:link
476        rss_data.atom_link = String::new();
477        let validator = RssFeedValidator::new(&rss_data);
478        let mut errors = Vec::new();
479        validator.validate_version_specific(&mut errors);
480        assert!(errors
481            .iter()
482            .any(|e| e.message.contains("atom:link is required")));
483    }
484
485    #[test]
486    fn test_validate_version_specific_rss1_0() {
487        let mut rss_data = RssData::new(Some(RssVersion::RSS1_0))
488            .title("Test Feed")
489            .link("https://example.com")
490            .description("A test feed");
491
492        rss_data.add_item(
493            RssItem::new()
494                .title("Test Item")
495                .link("https://example.com/item1")
496                .description("A test item")
497                .guid("unique-id-1"),
498        );
499
500        let validator = RssFeedValidator::new(&rss_data);
501        let mut errors = Vec::new();
502        validator.validate_version_specific(&mut errors);
503        assert!(errors.is_empty());
504
505        // Test without guid
506        rss_data.items[0].guid = String::new();
507        let validator = RssFeedValidator::new(&rss_data);
508        let mut errors = Vec::new();
509        validator.validate_version_specific(&mut errors);
510        assert!(errors.iter().any(|e| e
511            .message
512            .contains("All items must have a guid in RSS 1.0")));
513    }
514
515    #[test]
516    fn test_validate_version_specific_older_versions() {
517        for version in &[
518            RssVersion::RSS0_90,
519            RssVersion::RSS0_91,
520            RssVersion::RSS0_92,
521        ] {
522            let rss_data = RssData::new(Some(*version))
523                .title("Test Feed")
524                .link("https://example.com")
525                .description("A test feed");
526
527            let validator = RssFeedValidator::new(&rss_data);
528            let mut errors = Vec::new();
529            validator.validate_version_specific(&mut errors);
530            assert!(
531                errors.is_empty(),
532                "Unexpected errors for version {version:?}"
533            );
534        }
535    }
536
537    #[test]
538    fn test_parse_date_valid() {
539        let valid_date = "Mon, 01 Jan 2024 00:00:00 GMT";
540        assert!(RssFeedValidator::parse_date(valid_date).is_ok());
541    }
542
543    #[test]
544    fn test_parse_date_invalid() {
545        let invalid_date = "Invalid Date";
546        assert!(RssFeedValidator::parse_date(invalid_date).is_err());
547    }
548
549    #[test]
550    fn test_validate_guids() {
551        let mut rss_data = RssData::new(Some(RssVersion::RSS2_0))
552            .title("Test Feed")
553            .link("https://example.com")
554            .description("A test feed");
555
556        rss_data.add_item(RssItem::new().guid("guid1"));
557        rss_data.add_item(RssItem::new().guid("guid2"));
558        rss_data.add_item(RssItem::new().guid("guid1")); // Duplicate
559
560        let validator = RssFeedValidator::new(&rss_data);
561        let mut errors = Vec::new();
562        validator.validate_guids(&mut errors);
563
564        assert_eq!(errors.len(), 1);
565        assert!(errors[0]
566            .message
567            .contains("Duplicate GUID found: guid1"));
568    }
569
570    #[test]
571    fn test_validate_atom_link() {
572        let rss_data = RssData::new(Some(RssVersion::RSS2_0))
573            .title("Test Feed")
574            .link("https://example.com")
575            .description("A test feed");
576
577        let validator = RssFeedValidator::new(&rss_data);
578        let mut errors = Vec::new();
579        validator.validate_atom_link(&mut errors);
580
581        assert_eq!(errors.len(), 1);
582        assert!(errors[0]
583            .message
584            .contains("atom:link is required for RSS 2.0 feeds"));
585
586        let rss_data_with_atom =
587            rss_data.atom_link("https://example.com/feed.xml");
588        let validator = RssFeedValidator::new(&rss_data_with_atom);
589        let mut errors = Vec::new();
590        validator.validate_atom_link(&mut errors);
591
592        assert!(errors.is_empty());
593    }
594
595    #[test]
596    fn test_validate_rss_data() {
597        let invalid_rss_data = RssData::new(Some(RssVersion::RSS2_0)); // Missing required fields
598
599        let validator = RssFeedValidator::new(&invalid_rss_data);
600        let mut errors = Vec::new();
601        validator.validate_rss_data(&mut errors);
602
603        assert!(!errors.is_empty());
604        assert!(errors[0].message.contains("Title is missing"));
605    }
606
607    #[test]
608    fn test_validate_items_with_invalid_item() {
609        let mut rss_data = RssData::new(Some(RssVersion::RSS2_0))
610            .title("Test Feed")
611            .link("https://example.com")
612            .description("A test feed")
613            .atom_link("https://example.com/feed.xml")
614            .generator("Test");
615
616        // Add an item missing required fields (title, link, description)
617        rss_data.add_item(RssItem::new().guid("guid1"));
618
619        let validator = RssFeedValidator::new(&rss_data);
620        let mut errors = Vec::new();
621        validator.validate_items(&mut errors);
622
623        assert!(!errors.is_empty(), "Expected item validation errors");
624        assert!(errors[0].field.contains("item[0]"));
625        assert!(errors[0].message.contains("Item validation failed"));
626    }
627
628    #[test]
629    fn test_validate_dates_with_invalid_item_date() {
630        let mut rss_data = RssData::new(Some(RssVersion::RSS2_0))
631            .title("Test Feed")
632            .link("https://example.com")
633            .description("A test feed")
634            .atom_link("https://example.com/feed.xml")
635            .pub_date("Mon, 01 Jan 2024 00:00:00 GMT")
636            .generator("Test");
637
638        rss_data.add_item(
639            RssItem::new()
640                .title("Item")
641                .link("https://example.com/item")
642                .description("Desc")
643                .guid("guid1")
644                .pub_date("not a valid date"),
645        );
646
647        let validator = RssFeedValidator::new(&rss_data);
648        let mut errors = Vec::new();
649        validator.validate_dates(&mut errors);
650
651        assert!(!errors.is_empty(), "Expected date validation errors");
652        assert!(errors
653            .iter()
654            .any(|e| e.field.contains("item[0].pubDate")));
655    }
656
657    #[test]
658    fn test_validate_url_exceeds_max_length() {
659        let mut errors = Vec::new();
660        let long_url = format!(
661            "https://example.com/{}",
662            "a".repeat(MAX_URL_LENGTH)
663        );
664
665        RssFeedValidator::validate_url(&long_url, "test", &mut errors);
666
667        assert_eq!(errors.len(), 1);
668        assert!(errors[0]
669            .message
670            .contains("URL exceeds maximum length"));
671    }
672
673    #[test]
674    fn test_validate_structure_with_invalid_item_link() {
675        let mut rss_data = RssData::new(Some(RssVersion::RSS2_0))
676            .title("Test Feed")
677            .link("https://example.com")
678            .description("A test feed")
679            .atom_link("https://example.com/feed.xml");
680
681        rss_data.add_item(
682            RssItem::new()
683                .title("Item")
684                .link("not-a-valid-url")
685                .description("Desc")
686                .guid("guid1"),
687        );
688
689        let validator = RssFeedValidator::new(&rss_data);
690        let mut errors = Vec::new();
691        validator.validate_structure(&mut errors);
692
693        assert!(errors
694            .iter()
695            .any(|e| e.field.contains("item[0] link")));
696    }
697
698    #[test]
699    fn test_parse_date_missing_gmt_suffix() {
700        let result =
701            RssFeedValidator::parse_date("Mon, 01 Jan 2024 00:00:00");
702        assert!(result.is_err());
703        if let Err(RssError::DateParseError(msg)) = result {
704            assert!(msg.contains("missing GMT"));
705        } else {
706            panic!("Expected DateParseError");
707        }
708    }
709
710    #[test]
711    fn test_parse_date_invalid_format_with_gmt() {
712        let result = RssFeedValidator::parse_date("not-a-date GMT");
713        assert!(result.is_err());
714        if let Err(RssError::DateParseError(msg)) = result {
715            assert!(msg.contains("Failed to parse date"));
716        } else {
717            panic!("Expected DateParseError");
718        }
719    }
720
721    #[test]
722    fn test_validate_rss_feed_convenience_function() {
723        let mut rss_data = RssData::new(Some(RssVersion::RSS2_0))
724            .title("Test Feed")
725            .link("https://example.com")
726            .description("A test feed")
727            .atom_link("https://example.com/feed.xml")
728            .pub_date("Mon, 01 Jan 2024 00:00:00 GMT")
729            .generator("RSS Gen Test");
730
731        rss_data.add_item(
732            RssItem::new()
733                .title("Test Item")
734                .link("https://example.com/item1")
735                .description("A test item")
736                .guid("unique-id-1")
737                .pub_date("Mon, 01 Jan 2024 00:00:00 GMT"),
738        );
739
740        assert!(validate_rss_feed(&rss_data).is_ok());
741    }
742
743    #[test]
744    fn test_validate_rss_feed_convenience_function_invalid() {
745        let rss_data = RssData::new(Some(RssVersion::RSS2_0));
746        assert!(validate_rss_feed(&rss_data).is_err());
747    }
748}