1use crate::data::{RssData, RssVersion};
10use crate::error::{Result, RssError, ValidationError};
11use dtt::datetime::DateTime;
12use url::Url;
13
14const MAX_URL_LENGTH: usize = 2000;
16
17#[derive(Debug)]
19pub struct RssFeedValidator<'a> {
20 rss_data: &'a RssData,
21}
22
23impl<'a> RssFeedValidator<'a> {
24 #[must_use]
34 pub fn new(rss_data: &'a RssData) -> Self {
35 RssFeedValidator { rss_data }
36 }
37
38 pub fn validate(&self) -> Result<()> {
52 let mut errors = Vec::new();
53
54 self.validate_rss_data(&mut errors);
55 self.validate_structure(&mut errors);
56 self.validate_items(&mut errors);
57 self.validate_dates(&mut errors);
58 self.validate_version_specific(&mut errors);
59
60 if errors.is_empty() {
61 Ok(())
62 } else {
63 Err(RssError::ValidationErrors(
64 errors.into_iter().map(|e| e.to_string()).collect(),
65 ))
66 }
67 }
68
69 fn validate_rss_data(&self, errors: &mut Vec<ValidationError>) {
71 if let Err(e) = self.rss_data.validate() {
72 errors.push(ValidationError {
73 field: "rss_data".to_string(),
74 message: e.to_string(),
75 });
76 }
77 }
78
79 fn validate_structure(&self, errors: &mut Vec<ValidationError>) {
81 Self::validate_url(&self.rss_data.link, "channel link", errors);
82
83 for (index, item) in self.rss_data.items.iter().enumerate() {
84 Self::validate_url(
85 &item.link,
86 &format!("item[{}] link", index),
87 errors,
88 );
89 }
90
91 if self.rss_data.items.is_empty() {
92 errors.push(ValidationError {
93 field: "items".to_string(),
94 message: "RSS feed must contain at least one item"
95 .to_string(),
96 });
97 }
98
99 self.validate_guids(errors);
100 self.validate_atom_link(errors);
101 }
102
103 fn validate_guids(&self, errors: &mut Vec<ValidationError>) {
105 let mut guids = std::collections::HashSet::new();
106 for item in &self.rss_data.items {
107 if !guids.insert(&item.guid) {
108 errors.push(ValidationError {
109 field: "guid".to_string(),
110 message: format!(
111 "Duplicate GUID found: {}",
112 item.guid
113 ),
114 });
115 }
116 }
117 }
118
119 fn validate_atom_link(&self, errors: &mut Vec<ValidationError>) {
121 if self.rss_data.version == RssVersion::RSS2_0
122 && self.rss_data.atom_link.is_empty()
123 {
124 errors.push(ValidationError {
125 field: "atom_link".to_string(),
126 message: "atom:link is required for RSS 2.0 feeds"
127 .to_string(),
128 });
129 }
130 }
131
132 fn validate_items(&self, errors: &mut Vec<ValidationError>) {
134 for (index, item) in self.rss_data.items.iter().enumerate() {
135 if let Err(e) = item.validate() {
136 errors.push(ValidationError {
137 field: format!("item[{}]", index),
138 message: format!("Item validation failed: {}", e),
139 });
140 }
141 }
142 }
143
144 fn validate_dates(&self, errors: &mut Vec<ValidationError>) {
146 Self::validate_date(&self.rss_data.pub_date, "pubDate", errors);
147 Self::validate_date(
148 &self.rss_data.last_build_date,
149 "lastBuildDate",
150 errors,
151 );
152
153 for (index, item) in self.rss_data.items.iter().enumerate() {
154 Self::validate_date(
155 &item.pub_date,
156 &format!("item[{}].pubDate", index),
157 errors,
158 );
159 }
160 }
161
162 fn validate_date(
164 date_str: &str,
165 field: &str,
166 errors: &mut Vec<ValidationError>,
167 ) {
168 if !date_str.is_empty() {
169 if let Err(e) = Self::parse_date(date_str) {
170 errors.push(ValidationError {
171 field: field.to_string(),
172 message: format!("Invalid date format: {}", e),
173 });
174 }
175 }
176 }
177
178 pub fn parse_date(date_str: &str) -> Result<DateTime> {
192 let rss_date_format = "[weekday repr:short], [day] [month repr:short] [year] [hour]:[minute]:[second]";
193 let date_without_gmt =
194 date_str.strip_suffix(" GMT").ok_or_else(|| {
195 RssError::DateParseError(format!(
196 "Invalid date format (missing GMT): {}",
197 date_str
198 ))
199 })?;
200
201 let mut date = DateTime::parse_custom_format(
202 date_without_gmt,
203 rss_date_format,
204 )
205 .map_err(|_| {
206 RssError::DateParseError(format!(
207 "Failed to parse date: {}",
208 date_str
209 ))
210 })?;
211
212 date.offset = time::UtcOffset::UTC;
213 Ok(date)
214 }
215
216 fn validate_version_specific(
218 &self,
219 errors: &mut Vec<ValidationError>,
220 ) {
221 match self.rss_data.version {
222 RssVersion::RSS2_0 => {
223 if self.rss_data.generator.is_empty() {
224 errors.push(ValidationError {
225 field: "generator".to_string(),
226 message:
227 "generator is recommended for RSS 2.0 feeds"
228 .to_string(),
229 });
230 }
231 if self.rss_data.atom_link.is_empty() {
232 errors.push(ValidationError {
233 field: "atom_link".to_string(),
234 message:
235 "atom:link is required for RSS 2.0 feeds"
236 .to_string(),
237 });
238 }
239 }
240 RssVersion::RSS1_0 => {
241 if self
242 .rss_data
243 .items
244 .iter()
245 .any(|item| item.guid.is_empty())
246 {
247 errors.push(ValidationError {
248 field: "guid".to_string(),
249 message:
250 "All items must have a guid in RSS 1.0"
251 .to_string(),
252 });
253 }
254 }
255 _ => {}
256 }
257 }
258
259 fn validate_url(
267 url: &str,
268 field: &str,
269 errors: &mut Vec<ValidationError>,
270 ) {
271 if url.len() > MAX_URL_LENGTH {
272 errors.push(ValidationError {
273 field: field.to_string(),
274 message: format!(
275 "URL exceeds maximum length of {} characters",
276 MAX_URL_LENGTH
277 ),
278 });
279 return;
280 }
281
282 match Url::parse(url) {
283 Ok(parsed_url) => {
284 if parsed_url.scheme() != "http"
285 && parsed_url.scheme() != "https"
286 {
287 errors.push(ValidationError {
288 field: field.to_string(),
289 message: format!("Invalid URL scheme in {}: {}. Only HTTP and HTTPS are allowed.", field, url),
290 });
291 }
292 }
293 Err(_) => {
294 errors.push(ValidationError {
295 field: field.to_string(),
296 message: format!(
297 "Invalid URL in {}: {}",
298 field, url
299 ),
300 });
301 }
302 }
303 }
304}
305
306pub fn validate_rss_feed(rss_data: &RssData) -> Result<()> {
321 let validator = RssFeedValidator::new(rss_data);
322 validator.validate()
323}
324
325#[cfg(test)]
326mod tests {
327 use super::*;
328 use crate::data::RssItem;
329
330 #[test]
331 fn test_valid_rss_feed() {
332 let mut rss_data = RssData::new(Some(RssVersion::RSS2_0))
333 .title("Test Feed")
334 .link("https://example.com")
335 .description("A test feed")
336 .atom_link("https://example.com/feed.xml")
337 .pub_date("Mon, 01 Jan 2024 00:00:00 GMT")
338 .generator("RSS Gen Test");
339
340 rss_data.add_item(
341 RssItem::new()
342 .title("Test Item")
343 .link("https://example.com/item1")
344 .description("A test item")
345 .guid("unique-id-1")
346 .pub_date("Mon, 01 Jan 2024 00:00:00 GMT"),
347 );
348
349 let validator = RssFeedValidator::new(&rss_data);
350 assert!(validator.validate().is_ok());
351 }
352
353 #[test]
354 fn test_invalid_rss_feed() {
355 let rss_data = RssData::new(Some(RssVersion::RSS2_0))
356 .title("Test Feed")
357 .link("https://example.com")
358 .description("A test feed")
359 .pub_date("Invalid Date");
360
361 let validator = RssFeedValidator::new(&rss_data);
362 let result = validator.validate();
363 assert!(result.is_err());
364 if let Err(RssError::ValidationErrors(errors)) = result {
365 assert!(errors
366 .iter()
367 .any(|e| e.contains("atom:link is required")));
368 assert!(errors.iter().any(|e| e
369 .contains("RSS feed must contain at least one item")));
370 assert!(errors
371 .iter()
372 .any(|e| e.contains("Invalid date format")));
373 } else {
374 panic!("Expected ValidationErrors");
375 }
376 }
377
378 #[test]
379 fn test_validate_url_valid() {
380 let rss_data = RssData::new(None);
381 let mut errors = Vec::new();
382
383 RssFeedValidator::validate_url(
384 "https://example.com",
385 "test",
386 &mut errors,
387 );
388 RssFeedValidator::validate_url(
389 "http://example.com",
390 "test",
391 &mut errors,
392 );
393 RssFeedValidator::validate_url(
394 "https://sub.example.com/path?query=value",
395 "test",
396 &mut errors,
397 );
398
399 assert!(errors.is_empty());
400 assert!(rss_data.link.is_empty());
401 }
402
403 #[test]
404 fn test_validate_url_invalid() {
405 let mut errors = Vec::new();
406
407 RssFeedValidator::validate_url(
408 "not a url",
409 "test",
410 &mut errors,
411 );
412 RssFeedValidator::validate_url(
413 "ftp://example.com",
414 "test",
415 &mut errors,
416 );
417 RssFeedValidator::validate_url("http://", "test", &mut errors);
418 RssFeedValidator::validate_url("https://", "test", &mut errors);
419 RssFeedValidator::validate_url(
420 "file:///path/to/file",
421 "test",
422 &mut errors,
423 );
424
425 assert_eq!(errors.len(), 5);
426 }
427
428 #[test]
429 fn test_validate_structure_with_urls() {
430 let mut rss_data = RssData::new(Some(RssVersion::RSS2_0))
431 .title("Test Feed")
432 .link("https://example.com")
433 .description("A test feed")
434 .atom_link("https://example.com/feed.xml");
435
436 rss_data.add_item(
437 RssItem::new()
438 .title("Test Item")
439 .link("https://example.com/item1")
440 .description("A test item")
441 .guid("unique-id-1"),
442 );
443
444 let validator = RssFeedValidator::new(&rss_data);
445 let mut errors = Vec::new();
446 validator.validate_structure(&mut errors);
447 assert!(errors.is_empty());
448
449 rss_data.link = "not a url".to_string();
451 let validator = RssFeedValidator::new(&rss_data);
452 let mut errors = Vec::new();
453 validator.validate_structure(&mut errors);
454 assert!(errors
455 .iter()
456 .any(|e| e.message.contains("Invalid URL")));
457 }
458
459 #[test]
460 fn test_validate_version_specific_rss2_0() {
461 let mut rss_data = RssData::new(Some(RssVersion::RSS2_0))
462 .title("Test Feed")
463 .link("https://example.com")
464 .description("A test feed")
465 .atom_link("https://example.com/feed.xml")
466 .generator("RSS Gen Test");
467
468 let validator = RssFeedValidator::new(&rss_data);
469 let mut errors = Vec::new();
470 validator.validate_version_specific(&mut errors);
471 assert!(errors.is_empty());
472
473 rss_data.generator = String::new();
475 let validator = RssFeedValidator::new(&rss_data);
476 let mut errors = Vec::new();
477 validator.validate_version_specific(&mut errors);
478 assert!(errors
479 .iter()
480 .any(|e| e.message.contains("generator is recommended")));
481
482 rss_data.atom_link = String::new();
484 let validator = RssFeedValidator::new(&rss_data);
485 let mut errors = Vec::new();
486 validator.validate_version_specific(&mut errors);
487 assert!(errors
488 .iter()
489 .any(|e| e.message.contains("atom:link is required")));
490 }
491
492 #[test]
493 fn test_validate_version_specific_rss1_0() {
494 let mut rss_data = RssData::new(Some(RssVersion::RSS1_0))
495 .title("Test Feed")
496 .link("https://example.com")
497 .description("A test feed");
498
499 rss_data.add_item(
500 RssItem::new()
501 .title("Test Item")
502 .link("https://example.com/item1")
503 .description("A test item")
504 .guid("unique-id-1"),
505 );
506
507 let validator = RssFeedValidator::new(&rss_data);
508 let mut errors = Vec::new();
509 validator.validate_version_specific(&mut errors);
510 assert!(errors.is_empty());
511
512 rss_data.items[0].guid = String::new();
514 let validator = RssFeedValidator::new(&rss_data);
515 let mut errors = Vec::new();
516 validator.validate_version_specific(&mut errors);
517 assert!(errors.iter().any(|e| e
518 .message
519 .contains("All items must have a guid in RSS 1.0")));
520 }
521
522 #[test]
523 fn test_validate_version_specific_older_versions() {
524 for version in &[
525 RssVersion::RSS0_90,
526 RssVersion::RSS0_91,
527 RssVersion::RSS0_92,
528 ] {
529 let rss_data = RssData::new(Some(*version))
530 .title("Test Feed")
531 .link("https://example.com")
532 .description("A test feed");
533
534 let validator = RssFeedValidator::new(&rss_data);
535 let mut errors = Vec::new();
536 validator.validate_version_specific(&mut errors);
537 assert!(
538 errors.is_empty(),
539 "Unexpected errors for version {:?}",
540 version
541 );
542 }
543 }
544
545 #[test]
546 fn test_parse_date_valid() {
547 let valid_date = "Mon, 01 Jan 2024 00:00:00 GMT";
548 assert!(RssFeedValidator::parse_date(valid_date).is_ok());
549 }
550
551 #[test]
552 fn test_parse_date_invalid() {
553 let invalid_date = "Invalid Date";
554 assert!(RssFeedValidator::parse_date(invalid_date).is_err());
555 }
556
557 #[test]
558 fn test_validate_guids() {
559 let mut rss_data = RssData::new(Some(RssVersion::RSS2_0))
560 .title("Test Feed")
561 .link("https://example.com")
562 .description("A test feed");
563
564 rss_data.add_item(RssItem::new().guid("guid1"));
565 rss_data.add_item(RssItem::new().guid("guid2"));
566 rss_data.add_item(RssItem::new().guid("guid1")); let validator = RssFeedValidator::new(&rss_data);
569 let mut errors = Vec::new();
570 validator.validate_guids(&mut errors);
571
572 assert_eq!(errors.len(), 1);
573 assert!(errors[0]
574 .message
575 .contains("Duplicate GUID found: guid1"));
576 }
577
578 #[test]
579 fn test_validate_atom_link() {
580 let rss_data = RssData::new(Some(RssVersion::RSS2_0))
581 .title("Test Feed")
582 .link("https://example.com")
583 .description("A test feed");
584
585 let validator = RssFeedValidator::new(&rss_data);
586 let mut errors = Vec::new();
587 validator.validate_atom_link(&mut errors);
588
589 assert_eq!(errors.len(), 1);
590 assert!(errors[0]
591 .message
592 .contains("atom:link is required for RSS 2.0 feeds"));
593
594 let rss_data_with_atom =
595 rss_data.atom_link("https://example.com/feed.xml");
596 let validator = RssFeedValidator::new(&rss_data_with_atom);
597 let mut errors = Vec::new();
598 validator.validate_atom_link(&mut errors);
599
600 assert!(errors.is_empty());
601 }
602
603 #[test]
604 fn test_validate_rss_data() {
605 let invalid_rss_data = RssData::new(Some(RssVersion::RSS2_0)); let validator = RssFeedValidator::new(&invalid_rss_data);
608 let mut errors = Vec::new();
609 validator.validate_rss_data(&mut errors);
610
611 assert!(!errors.is_empty());
612 assert!(errors[0].message.contains("Title is missing"));
613 }
614}