feedparser_rs/util/
base_url.rs1use std::net::IpAddr;
7use url::Url;
8
9#[must_use]
41pub fn is_safe_url(url: &str) -> bool {
42 let Ok(parsed) = Url::parse(url) else {
43 return false;
44 };
45
46 match parsed.scheme() {
48 "http" | "https" => {}
49 _ => return false,
50 }
51
52 if let Some(host) = parsed.host() {
54 match host {
55 url::Host::Domain(domain) => {
56 if domain == "localhost" {
58 return false;
59 }
60
61 if domain == "metadata.google.internal" {
63 return false;
64 }
65 }
66 url::Host::Ipv4(ipv4) => {
67 let ip = IpAddr::V4(ipv4);
68 if ip.is_loopback() || is_private_ip(&ip) {
70 return false;
71 }
72
73 let octets = ipv4.octets();
75 if octets == [169, 254, 169, 254] {
76 return false;
77 }
78 }
79 url::Host::Ipv6(ipv6) => {
80 let ip = IpAddr::V6(ipv6);
81 if ip.is_loopback() || is_private_ip(&ip) {
83 return false;
84 }
85 }
86 }
87 }
88
89 true
90}
91
92fn is_private_ip(ip: &IpAddr) -> bool {
94 match ip {
95 IpAddr::V4(ipv4) => {
96 let octets = ipv4.octets();
97 octets[0] == 10
98 || (octets[0] == 172 && (16..=31).contains(&octets[1]))
99 || (octets[0] == 192 && octets[1] == 168)
100 || octets[0] == 127
101 }
102 IpAddr::V6(ipv6) => {
103 ipv6.is_loopback() || ipv6.is_unspecified() || (ipv6.segments()[0] & 0xfe00) == 0xfc00
104 }
105 }
106}
107
108#[must_use]
144pub fn resolve_url(href: &str, base: Option<&str>) -> String {
145 if href.starts_with("http://")
147 || href.starts_with("https://")
148 || href.starts_with("mailto:")
149 || href.starts_with("tel:")
150 {
151 return href.to_string();
152 }
153
154 let Some(base_str) = base else {
156 return href.to_string();
157 };
158
159 let Ok(base_url) = Url::parse(base_str) else {
161 return href.to_string();
162 };
163
164 base_url
166 .join(href)
167 .map_or_else(|_| href.to_string(), |resolved| resolved.to_string())
168}
169
170#[must_use]
211pub fn combine_bases(parent_base: Option<&str>, child_base: Option<&str>) -> Option<String> {
212 match (parent_base, child_base) {
213 (_, Some(child)) => {
214 Some(resolve_url(child, parent_base))
216 }
217 (Some(parent), None) => Some(parent.to_string()),
218 (None, None) => None,
219 }
220}
221
222#[derive(Debug, Clone, Default)]
227pub struct BaseUrlContext {
228 base: Option<String>,
230}
231
232impl BaseUrlContext {
233 #[must_use]
235 pub const fn new() -> Self {
236 Self { base: None }
237 }
238
239 #[must_use]
241 pub fn with_base(base: impl Into<String>) -> Self {
242 Self {
243 base: Some(base.into()),
244 }
245 }
246
247 #[must_use]
249 pub fn base(&self) -> Option<&str> {
250 self.base.as_deref()
251 }
252
253 pub fn update_base(&mut self, xml_base: &str) {
257 let new_base = resolve_url(xml_base, self.base.as_deref());
258 self.base = Some(new_base);
259 }
260
261 #[must_use]
263 pub fn resolve(&self, href: &str) -> String {
264 resolve_url(href, self.base.as_deref())
265 }
266
267 #[must_use]
269 pub fn child(&self) -> Self {
270 Self {
271 base: self.base.clone(),
272 }
273 }
274
275 #[must_use]
277 pub fn child_with_base(&self, xml_base: &str) -> Self {
278 let new_base = combine_bases(self.base.as_deref(), Some(xml_base));
279 Self { base: new_base }
280 }
281}
282
283#[cfg(test)]
284mod tests {
285 use super::*;
286
287 #[test]
288 fn test_resolve_absolute_url() {
289 assert_eq!(
290 resolve_url("http://example.com/page", Some("http://other.com/")),
291 "http://example.com/page"
292 );
293 assert_eq!(
294 resolve_url("https://example.com/page", Some("http://other.com/")),
295 "https://example.com/page"
296 );
297 }
298
299 #[test]
300 fn test_resolve_relative_url() {
301 assert_eq!(
302 resolve_url("page.html", Some("http://example.com/dir/")),
303 "http://example.com/dir/page.html"
304 );
305 assert_eq!(
306 resolve_url("/absolute/path", Some("http://example.com/dir/")),
307 "http://example.com/absolute/path"
308 );
309 assert_eq!(
310 resolve_url("../sibling/page", Some("http://example.com/dir/sub/")),
311 "http://example.com/dir/sibling/page"
312 );
313 }
314
315 #[test]
316 fn test_resolve_without_base() {
317 assert_eq!(resolve_url("page.html", None), "page.html");
318 assert_eq!(
319 resolve_url("http://example.com", None),
320 "http://example.com"
321 );
322 }
323
324 #[test]
325 fn test_resolve_invalid_base() {
326 assert_eq!(
327 resolve_url("page.html", Some("not a valid url")),
328 "page.html"
329 );
330 }
331
332 #[test]
333 fn test_resolve_special_schemes() {
334 assert_eq!(
335 resolve_url("mailto:test@example.com", Some("http://example.com/")),
336 "mailto:test@example.com"
337 );
338 assert_eq!(
339 resolve_url("tel:+1234567890", Some("http://example.com/")),
340 "tel:+1234567890"
341 );
342 }
343
344 #[test]
345 fn test_combine_bases_child_absolute() {
346 assert_eq!(
347 combine_bases(Some("http://parent.com/"), Some("http://child.com/")),
348 Some("http://child.com/".to_string())
349 );
350 }
351
352 #[test]
353 fn test_combine_bases_child_relative() {
354 assert_eq!(
355 combine_bases(Some("http://example.com/feed/"), Some("items/")),
356 Some("http://example.com/feed/items/".to_string())
357 );
358 }
359
360 #[test]
361 fn test_combine_bases_no_child() {
362 assert_eq!(
363 combine_bases(Some("http://example.com/"), None),
364 Some("http://example.com/".to_string())
365 );
366 }
367
368 #[test]
369 fn test_combine_bases_no_parent() {
370 assert_eq!(
371 combine_bases(None, Some("http://example.com/")),
372 Some("http://example.com/".to_string())
373 );
374 }
375
376 #[test]
377 fn test_combine_bases_none() {
378 assert_eq!(combine_bases(None, None), None);
379 }
380
381 #[test]
382 fn test_context_new() {
383 let ctx = BaseUrlContext::new();
384 assert!(ctx.base().is_none());
385 }
386
387 #[test]
388 fn test_context_with_base() {
389 let ctx = BaseUrlContext::with_base("http://example.com/");
390 assert_eq!(ctx.base(), Some("http://example.com/"));
391 }
392
393 #[test]
394 fn test_context_update_base() {
395 let mut ctx = BaseUrlContext::with_base("http://example.com/feed/");
396 ctx.update_base("items/");
397 assert_eq!(ctx.base(), Some("http://example.com/feed/items/"));
398 }
399
400 #[test]
401 fn test_context_resolve() {
402 let ctx = BaseUrlContext::with_base("http://example.com/feed/");
403 assert_eq!(
404 ctx.resolve("item.html"),
405 "http://example.com/feed/item.html"
406 );
407 assert_eq!(ctx.resolve("http://other.com/"), "http://other.com/");
408 }
409
410 #[test]
411 fn test_context_child() {
412 let parent = BaseUrlContext::with_base("http://example.com/");
413 let child = parent.child();
414 assert_eq!(child.base(), Some("http://example.com/"));
415 }
416
417 #[test]
418 fn test_context_child_with_base() {
419 let parent = BaseUrlContext::with_base("http://example.com/feed/");
420 let child = parent.child_with_base("items/");
421 assert_eq!(child.base(), Some("http://example.com/feed/items/"));
422 }
423
424 #[test]
425 fn test_fragment_preservation() {
426 assert_eq!(
427 resolve_url("#section", Some("http://example.com/page.html")),
428 "http://example.com/page.html#section"
429 );
430 }
431
432 #[test]
433 fn test_query_string_preservation() {
434 assert_eq!(
435 resolve_url("?query=value", Some("http://example.com/page.html")),
436 "http://example.com/page.html?query=value"
437 );
438 }
439
440 #[test]
441 fn test_empty_href() {
442 assert_eq!(
444 resolve_url("", Some("http://example.com/page.html")),
445 "http://example.com/page.html"
446 );
447 }
448
449 #[test]
451 fn test_is_safe_url_file_scheme() {
452 assert!(!is_safe_url("file:///etc/passwd"));
453 assert!(!is_safe_url("file:///C:/Windows/System32/config/sam"));
454 }
455
456 #[test]
457 fn test_is_safe_url_localhost() {
458 assert!(!is_safe_url("http://localhost/"));
459 assert!(!is_safe_url("http://127.0.0.1/"));
460 assert!(!is_safe_url("http://[::1]/"));
461 assert!(!is_safe_url("https://localhost:8080/api"));
462 }
463
464 #[test]
465 fn test_is_safe_url_private_ip() {
466 assert!(!is_safe_url("http://192.168.1.1/"));
468 assert!(!is_safe_url("http://192.168.0.1/"));
469 assert!(!is_safe_url("http://192.168.255.255/"));
470
471 assert!(!is_safe_url("http://10.0.0.1/"));
473 assert!(!is_safe_url("http://10.255.255.255/"));
474
475 assert!(!is_safe_url("http://172.16.0.1/"));
477 assert!(!is_safe_url("http://172.31.255.255/"));
478 assert!(!is_safe_url("http://172.20.10.5/"));
479
480 assert!(!is_safe_url("http://127.0.0.2/"));
482 assert!(!is_safe_url("http://127.255.255.255/"));
483 }
484
485 #[test]
486 fn test_is_safe_url_cloud_metadata() {
487 assert!(!is_safe_url("http://169.254.169.254/"));
488 assert!(!is_safe_url("http://169.254.169.254/latest/meta-data/"));
489 assert!(!is_safe_url("http://metadata.google.internal/"));
490 }
491
492 #[test]
493 fn test_is_safe_url_valid_urls() {
494 assert!(is_safe_url("http://example.com/"));
495 assert!(is_safe_url("https://github.com/"));
496 assert!(is_safe_url("http://1.1.1.1/"));
497 assert!(is_safe_url("https://8.8.8.8/"));
498 assert!(is_safe_url("http://example.com:8080/path"));
499 }
500
501 #[test]
502 fn test_is_safe_url_other_schemes() {
503 assert!(!is_safe_url("ftp://example.com/"));
504 assert!(!is_safe_url("data:text/html,<script>alert('xss')</script>"));
505 assert!(!is_safe_url("javascript:alert('xss')"));
506 assert!(!is_safe_url("gopher://example.com/"));
507 }
508
509 #[test]
510 fn test_is_safe_url_ipv6() {
511 assert!(!is_safe_url("http://[::1]/"));
513 assert!(!is_safe_url("http://[0:0:0:0:0:0:0:1]/"));
514
515 assert!(!is_safe_url("http://[fc00::1]/"));
517 assert!(!is_safe_url("http://[fd00::1]/"));
518
519 assert!(is_safe_url("http://[2001:4860:4860::8888]/"));
521 }
522
523 #[test]
524 fn test_is_safe_url_invalid_urls() {
525 assert!(!is_safe_url("not a url"));
526 assert!(!is_safe_url(""));
527 assert!(!is_safe_url("://invalid"));
528 }
529}