rss_gen/
validator.rs

1// Copyright © 2024 RSS Gen. All rights reserved.
2// SPDX-License-Identifier: Apache-2.0 OR MIT
3
4//! RSS feed validator module
5//!
6//! This module provides functionality to validate RSS feeds, ensuring they
7//! conform to the specified RSS version standards and contain valid data.
8
9use crate::data::{RssData, RssVersion};
10use crate::error::{Result, RssError, ValidationError};
11use dtt::datetime::DateTime;
12use url::Url;
13
14/// Maximum allowed length for URL strings
15const MAX_URL_LENGTH: usize = 2000;
16
17/// RSS feed validator for validating the structure and content of an RSS feed.
18#[derive(Debug)]
19pub struct RssFeedValidator<'a> {
20    rss_data: &'a RssData,
21}
22
23impl<'a> RssFeedValidator<'a> {
24    /// Creates a new `RssFeedValidator` instance with the provided `RssData`.
25    ///
26    /// # Arguments
27    ///
28    /// * `rss_data` - A reference to the `RssData` to be validated.
29    ///
30    /// # Returns
31    ///
32    /// A new instance of `RssFeedValidator`.
33    #[must_use]
34    pub fn new(rss_data: &'a RssData) -> Self {
35        RssFeedValidator { rss_data }
36    }
37
38    /// Validates the RSS feed structure and content.
39    ///
40    /// This method performs a comprehensive validation of the RSS feed,
41    /// including structure, items, dates, and version-specific requirements.
42    ///
43    /// # Returns
44    ///
45    /// * `Ok(())` if the validation passes.
46    /// * `Err(RssError::ValidationErrors)` containing a list of validation errors if any are found.
47    ///
48    /// # Errors
49    ///
50    /// This function returns an `Err(RssError::ValidationErrors)` if any validation checks fail.
51    pub fn validate(&self) -> Result<()> {
52        let mut errors = Vec::new();
53
54        self.validate_rss_data(&mut errors);
55        self.validate_structure(&mut errors);
56        self.validate_items(&mut errors);
57        self.validate_dates(&mut errors);
58        self.validate_version_specific(&mut errors);
59
60        if errors.is_empty() {
61            Ok(())
62        } else {
63            Err(RssError::ValidationErrors(
64                errors.into_iter().map(|e| e.to_string()).collect(),
65            ))
66        }
67    }
68
69    /// Validates the base `RssData` structure.
70    fn validate_rss_data(&self, errors: &mut Vec<ValidationError>) {
71        if let Err(e) = self.rss_data.validate() {
72            errors.push(ValidationError {
73                field: "rss_data".to_string(),
74                message: e.to_string(),
75            });
76        }
77    }
78
79    /// Validates the overall structure of the RSS feed.
80    fn validate_structure(&self, errors: &mut Vec<ValidationError>) {
81        Self::validate_url(&self.rss_data.link, "channel link", errors);
82
83        for (index, item) in self.rss_data.items.iter().enumerate() {
84            Self::validate_url(
85                &item.link,
86                &format!("item[{}] link", index),
87                errors,
88            );
89        }
90
91        if self.rss_data.items.is_empty() {
92            errors.push(ValidationError {
93                field: "items".to_string(),
94                message: "RSS feed must contain at least one item"
95                    .to_string(),
96            });
97        }
98
99        self.validate_guids(errors);
100        self.validate_atom_link(errors);
101    }
102
103    /// Validates that all GUIDs in the feed are unique.
104    fn validate_guids(&self, errors: &mut Vec<ValidationError>) {
105        let mut guids = std::collections::HashSet::new();
106        for item in &self.rss_data.items {
107            if !guids.insert(&item.guid) {
108                errors.push(ValidationError {
109                    field: "guid".to_string(),
110                    message: format!(
111                        "Duplicate GUID found: {}",
112                        item.guid
113                    ),
114                });
115            }
116        }
117    }
118
119    /// Validates the presence of atom:link for RSS 2.0 feeds.
120    fn validate_atom_link(&self, errors: &mut Vec<ValidationError>) {
121        if self.rss_data.version == RssVersion::RSS2_0
122            && self.rss_data.atom_link.is_empty()
123        {
124            errors.push(ValidationError {
125                field: "atom_link".to_string(),
126                message: "atom:link is required for RSS 2.0 feeds"
127                    .to_string(),
128            });
129        }
130    }
131
132    /// Validates individual items in the RSS feed.
133    fn validate_items(&self, errors: &mut Vec<ValidationError>) {
134        for (index, item) in self.rss_data.items.iter().enumerate() {
135            if let Err(e) = item.validate() {
136                errors.push(ValidationError {
137                    field: format!("item[{}]", index),
138                    message: format!("Item validation failed: {}", e),
139                });
140            }
141        }
142    }
143
144    /// Validates all dates in the RSS feed.
145    fn validate_dates(&self, errors: &mut Vec<ValidationError>) {
146        Self::validate_date(&self.rss_data.pub_date, "pubDate", errors);
147        Self::validate_date(
148            &self.rss_data.last_build_date,
149            "lastBuildDate",
150            errors,
151        );
152
153        for (index, item) in self.rss_data.items.iter().enumerate() {
154            Self::validate_date(
155                &item.pub_date,
156                &format!("item[{}].pubDate", index),
157                errors,
158            );
159        }
160    }
161
162    /// Validates a single date string.
163    fn validate_date(
164        date_str: &str,
165        field: &str,
166        errors: &mut Vec<ValidationError>,
167    ) {
168        if !date_str.is_empty() {
169            if let Err(e) = Self::parse_date(date_str) {
170                errors.push(ValidationError {
171                    field: field.to_string(),
172                    message: format!("Invalid date format: {}", e),
173                });
174            }
175        }
176    }
177
178    /// Parses a date string into a `DateTime` object.
179    ///
180    /// # Arguments
181    ///
182    /// * `date_str` - The date string to parse.
183    ///
184    /// # Returns
185    ///
186    /// A `Result` containing the parsed `DateTime` object or an error if the parsing fails.
187    ///
188    /// # Errors
189    ///
190    /// This function returns an `Err(RssError::DateParseError)` if the date format is invalid.
191    pub fn parse_date(date_str: &str) -> Result<DateTime> {
192        let rss_date_format = "[weekday repr:short], [day] [month repr:short] [year] [hour]:[minute]:[second]";
193        let date_without_gmt =
194            date_str.strip_suffix(" GMT").ok_or_else(|| {
195                RssError::DateParseError(format!(
196                    "Invalid date format (missing GMT): {}",
197                    date_str
198                ))
199            })?;
200
201        let mut date = DateTime::parse_custom_format(
202            date_without_gmt,
203            rss_date_format,
204        )
205        .map_err(|_| {
206            RssError::DateParseError(format!(
207                "Failed to parse date: {}",
208                date_str
209            ))
210        })?;
211
212        date.offset = time::UtcOffset::UTC;
213        Ok(date)
214    }
215
216    /// Validates version-specific requirements of the RSS feed.
217    fn validate_version_specific(
218        &self,
219        errors: &mut Vec<ValidationError>,
220    ) {
221        match self.rss_data.version {
222            RssVersion::RSS2_0 => {
223                if self.rss_data.generator.is_empty() {
224                    errors.push(ValidationError {
225                        field: "generator".to_string(),
226                        message:
227                            "generator is recommended for RSS 2.0 feeds"
228                                .to_string(),
229                    });
230                }
231                if self.rss_data.atom_link.is_empty() {
232                    errors.push(ValidationError {
233                        field: "atom_link".to_string(),
234                        message:
235                            "atom:link is required for RSS 2.0 feeds"
236                                .to_string(),
237                    });
238                }
239            }
240            RssVersion::RSS1_0 => {
241                if self
242                    .rss_data
243                    .items
244                    .iter()
245                    .any(|item| item.guid.is_empty())
246                {
247                    errors.push(ValidationError {
248                        field: "guid".to_string(),
249                        message:
250                            "All items must have a guid in RSS 1.0"
251                                .to_string(),
252                    });
253                }
254            }
255            _ => {}
256        }
257    }
258
259    /// Validates a URL string.
260    ///
261    /// # Arguments
262    ///
263    /// * `url` - The URL string to validate.
264    /// * `field` - The field name for error reporting.
265    /// * `errors` - A mutable vector to collect validation errors.
266    fn validate_url(
267        url: &str,
268        field: &str,
269        errors: &mut Vec<ValidationError>,
270    ) {
271        if url.len() > MAX_URL_LENGTH {
272            errors.push(ValidationError {
273                field: field.to_string(),
274                message: format!(
275                    "URL exceeds maximum length of {} characters",
276                    MAX_URL_LENGTH
277                ),
278            });
279            return;
280        }
281
282        match Url::parse(url) {
283            Ok(parsed_url) => {
284                if parsed_url.scheme() != "http"
285                    && parsed_url.scheme() != "https"
286                {
287                    errors.push(ValidationError {
288                        field: field.to_string(),
289                        message: format!("Invalid URL scheme in {}: {}. Only HTTP and HTTPS are allowed.", field, url),
290                    });
291                }
292            }
293            Err(_) => {
294                errors.push(ValidationError {
295                    field: field.to_string(),
296                    message: format!(
297                        "Invalid URL in {}: {}",
298                        field, url
299                    ),
300                });
301            }
302        }
303    }
304}
305
306/// Validates the provided `RssData` and returns a `Result` indicating success or failure.
307///
308/// # Arguments
309///
310/// * `rss_data` - A reference to the `RssData` to be validated.
311///
312/// # Returns
313///
314/// * `Ok(())` if the validation passes.
315/// * `Err(RssError::ValidationErrors)` containing a list of validation errors if any are found.
316///
317/// # Errors
318///
319/// This function returns an `Err(RssError::ValidationErrors)` if any validation checks fail.
320pub fn validate_rss_feed(rss_data: &RssData) -> Result<()> {
321    let validator = RssFeedValidator::new(rss_data);
322    validator.validate()
323}
324
325#[cfg(test)]
326mod tests {
327    use super::*;
328    use crate::data::RssItem;
329
330    #[test]
331    fn test_valid_rss_feed() {
332        let mut rss_data = RssData::new(Some(RssVersion::RSS2_0))
333            .title("Test Feed")
334            .link("https://example.com")
335            .description("A test feed")
336            .atom_link("https://example.com/feed.xml")
337            .pub_date("Mon, 01 Jan 2024 00:00:00 GMT")
338            .generator("RSS Gen Test");
339
340        rss_data.add_item(
341            RssItem::new()
342                .title("Test Item")
343                .link("https://example.com/item1")
344                .description("A test item")
345                .guid("unique-id-1")
346                .pub_date("Mon, 01 Jan 2024 00:00:00 GMT"),
347        );
348
349        let validator = RssFeedValidator::new(&rss_data);
350        assert!(validator.validate().is_ok());
351    }
352
353    #[test]
354    fn test_invalid_rss_feed() {
355        let rss_data = RssData::new(Some(RssVersion::RSS2_0))
356            .title("Test Feed")
357            .link("https://example.com")
358            .description("A test feed")
359            .pub_date("Invalid Date");
360
361        let validator = RssFeedValidator::new(&rss_data);
362        let result = validator.validate();
363        assert!(result.is_err());
364        if let Err(RssError::ValidationErrors(errors)) = result {
365            assert!(errors
366                .iter()
367                .any(|e| e.contains("atom:link is required")));
368            assert!(errors.iter().any(|e| e
369                .contains("RSS feed must contain at least one item")));
370            assert!(errors
371                .iter()
372                .any(|e| e.contains("Invalid date format")));
373        } else {
374            panic!("Expected ValidationErrors");
375        }
376    }
377
378    #[test]
379    fn test_validate_url_valid() {
380        let rss_data = RssData::new(None);
381        let mut errors = Vec::new();
382
383        RssFeedValidator::validate_url(
384            "https://example.com",
385            "test",
386            &mut errors,
387        );
388        RssFeedValidator::validate_url(
389            "http://example.com",
390            "test",
391            &mut errors,
392        );
393        RssFeedValidator::validate_url(
394            "https://sub.example.com/path?query=value",
395            "test",
396            &mut errors,
397        );
398
399        assert!(errors.is_empty());
400        assert!(rss_data.link.is_empty());
401    }
402
403    #[test]
404    fn test_validate_url_invalid() {
405        let mut errors = Vec::new();
406
407        RssFeedValidator::validate_url(
408            "not a url",
409            "test",
410            &mut errors,
411        );
412        RssFeedValidator::validate_url(
413            "ftp://example.com",
414            "test",
415            &mut errors,
416        );
417        RssFeedValidator::validate_url("http://", "test", &mut errors);
418        RssFeedValidator::validate_url("https://", "test", &mut errors);
419        RssFeedValidator::validate_url(
420            "file:///path/to/file",
421            "test",
422            &mut errors,
423        );
424
425        assert_eq!(errors.len(), 5);
426    }
427
428    #[test]
429    fn test_validate_structure_with_urls() {
430        let mut rss_data = RssData::new(Some(RssVersion::RSS2_0))
431            .title("Test Feed")
432            .link("https://example.com")
433            .description("A test feed")
434            .atom_link("https://example.com/feed.xml");
435
436        rss_data.add_item(
437            RssItem::new()
438                .title("Test Item")
439                .link("https://example.com/item1")
440                .description("A test item")
441                .guid("unique-id-1"),
442        );
443
444        let validator = RssFeedValidator::new(&rss_data);
445        let mut errors = Vec::new();
446        validator.validate_structure(&mut errors);
447        assert!(errors.is_empty());
448
449        // Test with invalid URL
450        rss_data.link = "not a url".to_string();
451        let validator = RssFeedValidator::new(&rss_data);
452        let mut errors = Vec::new();
453        validator.validate_structure(&mut errors);
454        assert!(errors
455            .iter()
456            .any(|e| e.message.contains("Invalid URL")));
457    }
458
459    #[test]
460    fn test_validate_version_specific_rss2_0() {
461        let mut rss_data = RssData::new(Some(RssVersion::RSS2_0))
462            .title("Test Feed")
463            .link("https://example.com")
464            .description("A test feed")
465            .atom_link("https://example.com/feed.xml")
466            .generator("RSS Gen Test");
467
468        let validator = RssFeedValidator::new(&rss_data);
469        let mut errors = Vec::new();
470        validator.validate_version_specific(&mut errors);
471        assert!(errors.is_empty());
472
473        // Test without generator
474        rss_data.generator = String::new();
475        let validator = RssFeedValidator::new(&rss_data);
476        let mut errors = Vec::new();
477        validator.validate_version_specific(&mut errors);
478        assert!(errors
479            .iter()
480            .any(|e| e.message.contains("generator is recommended")));
481
482        // Test without atom:link
483        rss_data.atom_link = String::new();
484        let validator = RssFeedValidator::new(&rss_data);
485        let mut errors = Vec::new();
486        validator.validate_version_specific(&mut errors);
487        assert!(errors
488            .iter()
489            .any(|e| e.message.contains("atom:link is required")));
490    }
491
492    #[test]
493    fn test_validate_version_specific_rss1_0() {
494        let mut rss_data = RssData::new(Some(RssVersion::RSS1_0))
495            .title("Test Feed")
496            .link("https://example.com")
497            .description("A test feed");
498
499        rss_data.add_item(
500            RssItem::new()
501                .title("Test Item")
502                .link("https://example.com/item1")
503                .description("A test item")
504                .guid("unique-id-1"),
505        );
506
507        let validator = RssFeedValidator::new(&rss_data);
508        let mut errors = Vec::new();
509        validator.validate_version_specific(&mut errors);
510        assert!(errors.is_empty());
511
512        // Test without guid
513        rss_data.items[0].guid = String::new();
514        let validator = RssFeedValidator::new(&rss_data);
515        let mut errors = Vec::new();
516        validator.validate_version_specific(&mut errors);
517        assert!(errors.iter().any(|e| e
518            .message
519            .contains("All items must have a guid in RSS 1.0")));
520    }
521
522    #[test]
523    fn test_validate_version_specific_older_versions() {
524        for version in &[
525            RssVersion::RSS0_90,
526            RssVersion::RSS0_91,
527            RssVersion::RSS0_92,
528        ] {
529            let rss_data = RssData::new(Some(*version))
530                .title("Test Feed")
531                .link("https://example.com")
532                .description("A test feed");
533
534            let validator = RssFeedValidator::new(&rss_data);
535            let mut errors = Vec::new();
536            validator.validate_version_specific(&mut errors);
537            assert!(
538                errors.is_empty(),
539                "Unexpected errors for version {:?}",
540                version
541            );
542        }
543    }
544
545    #[test]
546    fn test_parse_date_valid() {
547        let valid_date = "Mon, 01 Jan 2024 00:00:00 GMT";
548        assert!(RssFeedValidator::parse_date(valid_date).is_ok());
549    }
550
551    #[test]
552    fn test_parse_date_invalid() {
553        let invalid_date = "Invalid Date";
554        assert!(RssFeedValidator::parse_date(invalid_date).is_err());
555    }
556
557    #[test]
558    fn test_validate_guids() {
559        let mut rss_data = RssData::new(Some(RssVersion::RSS2_0))
560            .title("Test Feed")
561            .link("https://example.com")
562            .description("A test feed");
563
564        rss_data.add_item(RssItem::new().guid("guid1"));
565        rss_data.add_item(RssItem::new().guid("guid2"));
566        rss_data.add_item(RssItem::new().guid("guid1")); // Duplicate
567
568        let validator = RssFeedValidator::new(&rss_data);
569        let mut errors = Vec::new();
570        validator.validate_guids(&mut errors);
571
572        assert_eq!(errors.len(), 1);
573        assert!(errors[0]
574            .message
575            .contains("Duplicate GUID found: guid1"));
576    }
577
578    #[test]
579    fn test_validate_atom_link() {
580        let rss_data = RssData::new(Some(RssVersion::RSS2_0))
581            .title("Test Feed")
582            .link("https://example.com")
583            .description("A test feed");
584
585        let validator = RssFeedValidator::new(&rss_data);
586        let mut errors = Vec::new();
587        validator.validate_atom_link(&mut errors);
588
589        assert_eq!(errors.len(), 1);
590        assert!(errors[0]
591            .message
592            .contains("atom:link is required for RSS 2.0 feeds"));
593
594        let rss_data_with_atom =
595            rss_data.atom_link("https://example.com/feed.xml");
596        let validator = RssFeedValidator::new(&rss_data_with_atom);
597        let mut errors = Vec::new();
598        validator.validate_atom_link(&mut errors);
599
600        assert!(errors.is_empty());
601    }
602
603    #[test]
604    fn test_validate_rss_data() {
605        let invalid_rss_data = RssData::new(Some(RssVersion::RSS2_0)); // Missing required fields
606
607        let validator = RssFeedValidator::new(&invalid_rss_data);
608        let mut errors = Vec::new();
609        validator.validate_rss_data(&mut errors);
610
611        assert!(!errors.is_empty());
612        assert!(errors[0].message.contains("Title is missing"));
613    }
614}