feedparser_rs/util/
base_url.rs1use std::net::IpAddr;
7use url::Url;
8
9#[must_use]
41pub fn is_safe_url(url: &str) -> bool {
42 let Ok(parsed) = Url::parse(url) else {
43 return false;
44 };
45
46 match parsed.scheme() {
48 "http" | "https" => {}
49 _ => return false,
50 }
51
52 if let Some(host) = parsed.host() {
54 match host {
55 url::Host::Domain(domain) => {
56 if domain == "localhost" {
58 return false;
59 }
60
61 if domain == "metadata.google.internal" {
63 return false;
64 }
65 }
66 url::Host::Ipv4(ipv4) => {
67 let ip = IpAddr::V4(ipv4);
68 if ip.is_loopback() || is_private_ip(&ip) {
70 return false;
71 }
72
73 let octets = ipv4.octets();
75 if octets == [169, 254, 169, 254] {
76 return false;
77 }
78 }
79 url::Host::Ipv6(ipv6) => {
80 let ip = IpAddr::V6(ipv6);
81 if ip.is_loopback() || is_private_ip(&ip) {
83 return false;
84 }
85 }
86 }
87 }
88
89 true
90}
91
92fn is_private_ip(ip: &IpAddr) -> bool {
94 match ip {
95 IpAddr::V4(ipv4) => {
96 let octets = ipv4.octets();
97 octets[0] == 10
98 || (octets[0] == 172 && (16..=31).contains(&octets[1]))
99 || (octets[0] == 192 && octets[1] == 168)
100 || octets[0] == 127
101 }
102 IpAddr::V6(ipv6) => {
103 ipv6.is_loopback() || ipv6.is_unspecified() || (ipv6.segments()[0] & 0xfe00) == 0xfc00
104 }
105 }
106}
107
108#[must_use]
144pub fn resolve_url(href: &str, base: Option<&str>) -> String {
145 if href.starts_with("http://")
147 || href.starts_with("https://")
148 || href.starts_with("mailto:")
149 || href.starts_with("tel:")
150 {
151 return href.to_string();
152 }
153
154 let Some(base_str) = base else {
156 return href.to_string();
157 };
158
159 let Ok(base_url) = Url::parse(base_str) else {
161 return href.to_string();
162 };
163
164 base_url
166 .join(href)
167 .map_or_else(|_| href.to_string(), |resolved| resolved.to_string())
168}
169
170#[must_use]
211pub fn combine_bases(parent_base: Option<&str>, child_base: Option<&str>) -> Option<String> {
212 match (parent_base, child_base) {
213 (_, Some(child)) => {
214 Some(resolve_url(child, parent_base))
216 }
217 (Some(parent), None) => Some(parent.to_string()),
218 (None, None) => None,
219 }
220}
221
222#[derive(Debug, Clone, Default)]
227pub struct BaseUrlContext {
228 base: Option<String>,
230}
231
232impl BaseUrlContext {
233 #[must_use]
235 pub const fn new() -> Self {
236 Self { base: None }
237 }
238
239 #[must_use]
241 pub fn with_base(base: impl Into<String>) -> Self {
242 Self {
243 base: Some(base.into()),
244 }
245 }
246
247 #[must_use]
249 pub fn base(&self) -> Option<&str> {
250 self.base.as_deref()
251 }
252
253 pub fn update_base(&mut self, xml_base: &str) {
257 let new_base = resolve_url(xml_base, self.base.as_deref());
258 self.base = Some(new_base);
259 }
260
261 #[must_use]
263 pub fn resolve(&self, href: &str) -> String {
264 resolve_url(href, self.base.as_deref())
265 }
266
267 #[must_use]
300 pub fn resolve_safe(&self, href: &str) -> String {
301 let resolved = self.resolve(href);
302
303 let resolved_lower = resolved.to_lowercase();
305
306 if resolved_lower.starts_with("file://")
309 || resolved_lower.starts_with("data:")
310 || resolved_lower.starts_with("javascript:")
311 || resolved_lower.starts_with("ftp://")
312 || resolved_lower.starts_with("gopher://")
313 {
314 return href.to_string();
316 }
317
318 if resolved_lower.starts_with("http://") || resolved_lower.starts_with("https://") {
320 if is_safe_url(&resolved) {
321 resolved
322 } else {
323 let href_is_unsafe_absolute = Url::parse(href).is_ok_and(|parsed_href| {
327 let is_http_scheme = matches!(parsed_href.scheme(), "http" | "https");
328 is_http_scheme && !is_safe_url(href)
329 });
330
331 if href_is_unsafe_absolute {
332 String::new()
333 } else {
334 href.to_string()
335 }
336 }
337 } else {
338 resolved
340 }
341 }
342
343 #[must_use]
345 pub fn child(&self) -> Self {
346 Self {
347 base: self.base.clone(),
348 }
349 }
350
351 #[must_use]
353 pub fn child_with_base(&self, xml_base: &str) -> Self {
354 let new_base = combine_bases(self.base.as_deref(), Some(xml_base));
355 Self { base: new_base }
356 }
357}
358
359#[cfg(test)]
360mod tests {
361 use super::*;
362
363 #[test]
364 fn test_resolve_absolute_url() {
365 assert_eq!(
366 resolve_url("http://example.com/page", Some("http://other.com/")),
367 "http://example.com/page"
368 );
369 assert_eq!(
370 resolve_url("https://example.com/page", Some("http://other.com/")),
371 "https://example.com/page"
372 );
373 }
374
375 #[test]
376 fn test_resolve_relative_url() {
377 assert_eq!(
378 resolve_url("page.html", Some("http://example.com/dir/")),
379 "http://example.com/dir/page.html"
380 );
381 assert_eq!(
382 resolve_url("/absolute/path", Some("http://example.com/dir/")),
383 "http://example.com/absolute/path"
384 );
385 assert_eq!(
386 resolve_url("../sibling/page", Some("http://example.com/dir/sub/")),
387 "http://example.com/dir/sibling/page"
388 );
389 }
390
391 #[test]
392 fn test_resolve_without_base() {
393 assert_eq!(resolve_url("page.html", None), "page.html");
394 assert_eq!(
395 resolve_url("http://example.com", None),
396 "http://example.com"
397 );
398 }
399
400 #[test]
401 fn test_resolve_invalid_base() {
402 assert_eq!(
403 resolve_url("page.html", Some("not a valid url")),
404 "page.html"
405 );
406 }
407
408 #[test]
409 fn test_resolve_special_schemes() {
410 assert_eq!(
411 resolve_url("mailto:test@example.com", Some("http://example.com/")),
412 "mailto:test@example.com"
413 );
414 assert_eq!(
415 resolve_url("tel:+1234567890", Some("http://example.com/")),
416 "tel:+1234567890"
417 );
418 }
419
420 #[test]
421 fn test_combine_bases_child_absolute() {
422 assert_eq!(
423 combine_bases(Some("http://parent.com/"), Some("http://child.com/")),
424 Some("http://child.com/".to_string())
425 );
426 }
427
428 #[test]
429 fn test_combine_bases_child_relative() {
430 assert_eq!(
431 combine_bases(Some("http://example.com/feed/"), Some("items/")),
432 Some("http://example.com/feed/items/".to_string())
433 );
434 }
435
436 #[test]
437 fn test_combine_bases_no_child() {
438 assert_eq!(
439 combine_bases(Some("http://example.com/"), None),
440 Some("http://example.com/".to_string())
441 );
442 }
443
444 #[test]
445 fn test_combine_bases_no_parent() {
446 assert_eq!(
447 combine_bases(None, Some("http://example.com/")),
448 Some("http://example.com/".to_string())
449 );
450 }
451
452 #[test]
453 fn test_combine_bases_none() {
454 assert_eq!(combine_bases(None, None), None);
455 }
456
457 #[test]
458 fn test_context_new() {
459 let ctx = BaseUrlContext::new();
460 assert!(ctx.base().is_none());
461 }
462
463 #[test]
464 fn test_context_with_base() {
465 let ctx = BaseUrlContext::with_base("http://example.com/");
466 assert_eq!(ctx.base(), Some("http://example.com/"));
467 }
468
469 #[test]
470 fn test_context_update_base() {
471 let mut ctx = BaseUrlContext::with_base("http://example.com/feed/");
472 ctx.update_base("items/");
473 assert_eq!(ctx.base(), Some("http://example.com/feed/items/"));
474 }
475
476 #[test]
477 fn test_context_resolve() {
478 let ctx = BaseUrlContext::with_base("http://example.com/feed/");
479 assert_eq!(
480 ctx.resolve("item.html"),
481 "http://example.com/feed/item.html"
482 );
483 assert_eq!(ctx.resolve("http://other.com/"), "http://other.com/");
484 }
485
486 #[test]
487 fn test_context_child() {
488 let parent = BaseUrlContext::with_base("http://example.com/");
489 let child = parent.child();
490 assert_eq!(child.base(), Some("http://example.com/"));
491 }
492
493 #[test]
494 fn test_context_child_with_base() {
495 let parent = BaseUrlContext::with_base("http://example.com/feed/");
496 let child = parent.child_with_base("items/");
497 assert_eq!(child.base(), Some("http://example.com/feed/items/"));
498 }
499
500 #[test]
501 fn test_fragment_preservation() {
502 assert_eq!(
503 resolve_url("#section", Some("http://example.com/page.html")),
504 "http://example.com/page.html#section"
505 );
506 }
507
508 #[test]
509 fn test_query_string_preservation() {
510 assert_eq!(
511 resolve_url("?query=value", Some("http://example.com/page.html")),
512 "http://example.com/page.html?query=value"
513 );
514 }
515
516 #[test]
517 fn test_empty_href() {
518 assert_eq!(
520 resolve_url("", Some("http://example.com/page.html")),
521 "http://example.com/page.html"
522 );
523 }
524
525 #[test]
527 fn test_is_safe_url_file_scheme() {
528 assert!(!is_safe_url("file:///etc/passwd"));
529 assert!(!is_safe_url("file:///C:/Windows/System32/config/sam"));
530 }
531
532 #[test]
533 fn test_is_safe_url_localhost() {
534 assert!(!is_safe_url("http://localhost/"));
535 assert!(!is_safe_url("http://127.0.0.1/"));
536 assert!(!is_safe_url("http://[::1]/"));
537 assert!(!is_safe_url("https://localhost:8080/api"));
538 }
539
540 #[test]
541 fn test_is_safe_url_private_ip() {
542 assert!(!is_safe_url("http://192.168.1.1/"));
544 assert!(!is_safe_url("http://192.168.0.1/"));
545 assert!(!is_safe_url("http://192.168.255.255/"));
546
547 assert!(!is_safe_url("http://10.0.0.1/"));
549 assert!(!is_safe_url("http://10.255.255.255/"));
550
551 assert!(!is_safe_url("http://172.16.0.1/"));
553 assert!(!is_safe_url("http://172.31.255.255/"));
554 assert!(!is_safe_url("http://172.20.10.5/"));
555
556 assert!(!is_safe_url("http://127.0.0.2/"));
558 assert!(!is_safe_url("http://127.255.255.255/"));
559 }
560
561 #[test]
562 fn test_is_safe_url_cloud_metadata() {
563 assert!(!is_safe_url("http://169.254.169.254/"));
564 assert!(!is_safe_url("http://169.254.169.254/latest/meta-data/"));
565 assert!(!is_safe_url("http://metadata.google.internal/"));
566 }
567
568 #[test]
569 fn test_is_safe_url_valid_urls() {
570 assert!(is_safe_url("http://example.com/"));
571 assert!(is_safe_url("https://github.com/"));
572 assert!(is_safe_url("http://1.1.1.1/"));
573 assert!(is_safe_url("https://8.8.8.8/"));
574 assert!(is_safe_url("http://example.com:8080/path"));
575 }
576
577 #[test]
578 fn test_is_safe_url_other_schemes() {
579 assert!(!is_safe_url("ftp://example.com/"));
580 assert!(!is_safe_url("data:text/html,<script>alert('xss')</script>"));
581 assert!(!is_safe_url("javascript:alert('xss')"));
582 assert!(!is_safe_url("gopher://example.com/"));
583 }
584
585 #[test]
586 fn test_is_safe_url_ipv6() {
587 assert!(!is_safe_url("http://[::1]/"));
589 assert!(!is_safe_url("http://[0:0:0:0:0:0:0:1]/"));
590
591 assert!(!is_safe_url("http://[fc00::1]/"));
593 assert!(!is_safe_url("http://[fd00::1]/"));
594
595 assert!(is_safe_url("http://[2001:4860:4860::8888]/"));
597 }
598
599 #[test]
600 fn test_is_safe_url_invalid_urls() {
601 assert!(!is_safe_url("not a url"));
602 assert!(!is_safe_url(""));
603 assert!(!is_safe_url("://invalid"));
604 }
605}