Skip to main content

hexz_core/store/
utils.rs

1//! URL validation utilities for storage backends.
2//!
3//! This module provides security-focused URL validation functions used by
4//! HTTP and S3 storage backends to prevent SSRF (Server-Side Request Forgery)
5//! attacks. It detects and blocks access to internal networks, loopback addresses,
6//! and cloud metadata endpoints.
7
8use hexz_common::{Error, Result};
9use std::io::{Error as IoError, ErrorKind};
10use std::net::{IpAddr, ToSocketAddrs};
11use url::{Host, Url};
12
13/// Checks if an IP address belongs to a restricted range.
14///
15/// Restricted ranges include:
16/// - **IPv4**:
17///   - Loopback: 127.0.0.0/8
18///   - Private: 10.0.0.0/8, 172.16.0.0/12, 192.168.0.0/16
19///   - Link-local/metadata: 169.254.0.0/16 (AWS metadata: 169.254.169.254)
20/// - **IPv6**:
21///   - Loopback: ::1
22///   - Unique local: fc00::/7
23///   - Link-local: fe80::/10
24///
25/// # Parameters
26///
27/// - `ip`: The IP address to check
28///
29/// # Returns
30///
31/// `true` if the IP is in a restricted range, `false` otherwise.
32///
33/// # Examples
34///
35/// ```
36/// use std::net::IpAddr;
37/// use hexz_core::store::utils::is_restricted_ip;
38///
39/// // Loopback is restricted
40/// assert!(is_restricted_ip("127.0.0.1".parse::<IpAddr>().unwrap()));
41///
42/// // Private network is restricted
43/// assert!(is_restricted_ip("192.168.1.1".parse::<IpAddr>().unwrap()));
44///
45/// // Public IP is not restricted
46/// assert!(!is_restricted_ip("8.8.8.8".parse::<IpAddr>().unwrap()));
47/// ```
48pub fn is_restricted_ip(ip: IpAddr) -> bool {
49    match ip {
50        IpAddr::V4(ipv4) => {
51            let octets = ipv4.octets();
52            // 127.0.0.0/8 (Loopback)
53            if octets[0] == 127 {
54                return true;
55            }
56            // 10.0.0.0/8 (Private)
57            if octets[0] == 10 {
58                return true;
59            }
60            // 172.16.0.0/12 (Private)
61            if octets[0] == 172 && (octets[1] >= 16 && octets[1] <= 31) {
62                return true;
63            }
64            // 192.168.0.0/16 (Private)
65            if octets[0] == 192 && octets[1] == 168 {
66                return true;
67            }
68            // 169.254.0.0/16 (Link-Local / Cloud Metadata)
69            if octets[0] == 169 && octets[1] == 254 {
70                return true;
71            }
72            false
73        }
74        IpAddr::V6(ipv6) => {
75            if ipv6.is_loopback() {
76                return true;
77            }
78            let segments = ipv6.segments();
79            // fc00::/7 (Unique Local)
80            if (segments[0] & 0xfe00) == 0xfc00 {
81                return true;
82            }
83            // fe80::/10 (Link-Local)
84            if (segments[0] & 0xffc0) == 0xfe80 {
85                return true;
86            }
87            false
88        }
89    }
90}
91
92/// Validates and sanitizes a URL for safe remote access.
93///
94/// This function performs comprehensive validation to prevent SSRF attacks:
95/// 1. Parses the URL and checks scheme (only HTTP/HTTPS allowed)
96/// 2. Extracts the hostname
97/// 3. Resolves domain names to IP addresses via DNS
98/// 4. Checks all resolved IPs against restricted ranges
99/// 5. Returns the sanitized URL if validation passes
100///
101/// # Parameters
102///
103/// - `url_str`: The URL string to validate
104/// - `allow_restricted`: If `true`, skips IP restriction checks (dangerous!)
105///
106/// # Returns
107///
108/// - `Ok(String)`: The validated and normalized URL
109/// - `Err(Error::Io)`: If URL is malformed, uses invalid scheme, or points to restricted IP
110///
111/// # Security
112///
113/// Always use `allow_restricted: false` in production unless you have a specific
114/// trusted environment. Allowing restricted IPs can enable:
115/// - Access to cloud metadata endpoints (AWS: 169.254.169.254)
116/// - Internal service discovery and enumeration
117/// - Port scanning of private networks
118///
119/// # Examples
120///
121/// ```
122/// use hexz_core::store::utils::validate_url;
123///
124/// // Valid public URL
125/// assert!(validate_url("https://example.com/file.hxz", false).is_ok());
126///
127/// // Invalid scheme
128/// assert!(validate_url("ftp://example.com/file.hxz", false).is_err());
129///
130/// // Restricted IP (blocked by default)
131/// assert!(validate_url("http://127.0.0.1/file.hxz", false).is_err());
132///
133/// // Restricted IP (allowed with flag)
134/// assert!(validate_url("http://127.0.0.1/file.hxz", true).is_ok());
135/// ```
136pub fn validate_url(url_str: &str, allow_restricted: bool) -> Result<String> {
137    let url = Url::parse(url_str).map_err(|e| {
138        Error::Io(IoError::new(
139            ErrorKind::InvalidInput,
140            format!("Invalid URL: {}", e),
141        ))
142    })?;
143
144    if url.scheme() != "http" && url.scheme() != "https" {
145        return Err(Error::Io(IoError::new(
146            ErrorKind::InvalidInput,
147            "Only HTTP and HTTPS schemes are allowed",
148        )));
149    }
150
151    // If restricted IPs are allowed, we can skip the IP checks
152    if allow_restricted {
153        return Ok(url.to_string());
154    }
155
156    let host = url
157        .host()
158        .ok_or_else(|| Error::Io(IoError::new(ErrorKind::InvalidInput, "URL missing host")))?;
159
160    match host {
161        Host::Ipv4(ip) => {
162            if is_restricted_ip(IpAddr::V4(ip)) {
163                return Err(Error::Io(IoError::new(
164                    ErrorKind::PermissionDenied,
165                    format!("Access to internal/private IP denied: {}", ip),
166                )));
167            }
168        }
169        Host::Ipv6(ip) => {
170            if is_restricted_ip(IpAddr::V6(ip)) {
171                return Err(Error::Io(IoError::new(
172                    ErrorKind::PermissionDenied,
173                    format!("Access to internal/private IP denied: {}", ip),
174                )));
175            }
176        }
177        Host::Domain(domain) => {
178            // Defensive: Strip brackets if they somehow ended up in the domain string
179            let clean_domain = if domain.starts_with('[') && domain.ends_with(']') {
180                &domain[1..domain.len() - 1]
181            } else {
182                domain
183            };
184
185            // Try parsing as IP first to avoid DNS lookup for literals
186            if let Ok(ip) = clean_domain.parse::<IpAddr>() {
187                if is_restricted_ip(ip) {
188                    return Err(Error::Io(IoError::new(
189                        ErrorKind::PermissionDenied,
190                        format!("Access to internal/private IP denied: {}", ip),
191                    )));
192                }
193                return Ok(url.to_string());
194            }
195
196            let port = url.port_or_known_default().unwrap_or(80);
197
198            let addrs = (clean_domain, port).to_socket_addrs().map_err(|e| {
199                Error::Io(IoError::other(format!(
200                    "DNS resolution failed for domain '{}': {}",
201                    clean_domain, e
202                )))
203            })?;
204
205            for addr in addrs {
206                if is_restricted_ip(addr.ip()) {
207                    return Err(Error::Io(IoError::new(
208                        ErrorKind::PermissionDenied,
209                        format!("Access to internal/private IP denied: {}", addr.ip()),
210                    )));
211                }
212            }
213        }
214    }
215
216    Ok(url.to_string())
217}
218
219#[cfg(test)]
220mod tests {
221    use super::*;
222    use std::net::{Ipv4Addr, Ipv6Addr};
223
224    // Tests for is_restricted_ip()
225
226    #[test]
227    fn test_ipv4_loopback_is_restricted() {
228        let ips = vec!["127.0.0.1", "127.0.0.2", "127.1.1.1", "127.255.255.255"];
229        for ip_str in ips {
230            let ip: Ipv4Addr = ip_str.parse().unwrap();
231            assert!(
232                is_restricted_ip(IpAddr::V4(ip)),
233                "Loopback IP {} should be restricted",
234                ip_str
235            );
236        }
237    }
238
239    #[test]
240    fn test_ipv4_private_10_is_restricted() {
241        let ips = vec!["10.0.0.0", "10.0.0.1", "10.255.255.255"];
242        for ip_str in ips {
243            let ip: Ipv4Addr = ip_str.parse().unwrap();
244            assert!(
245                is_restricted_ip(IpAddr::V4(ip)),
246                "Private IP {} should be restricted",
247                ip_str
248            );
249        }
250    }
251
252    #[test]
253    fn test_ipv4_private_172_16_31_is_restricted() {
254        let ips = vec!["172.16.0.0", "172.16.0.1", "172.20.0.1", "172.31.255.255"];
255        for ip_str in ips {
256            let ip: Ipv4Addr = ip_str.parse().unwrap();
257            assert!(
258                is_restricted_ip(IpAddr::V4(ip)),
259                "Private IP {} should be restricted",
260                ip_str
261            );
262        }
263
264        // Test boundaries (172.15 and 172.32 should NOT be restricted)
265        assert!(!is_restricted_ip(IpAddr::V4(
266            "172.15.255.255".parse().unwrap()
267        )));
268        assert!(!is_restricted_ip(IpAddr::V4("172.32.0.0".parse().unwrap())));
269    }
270
271    #[test]
272    fn test_ipv4_private_192_168_is_restricted() {
273        let ips = vec!["192.168.0.0", "192.168.1.1", "192.168.255.255"];
274        for ip_str in ips {
275            let ip: Ipv4Addr = ip_str.parse().unwrap();
276            assert!(
277                is_restricted_ip(IpAddr::V4(ip)),
278                "Private IP {} should be restricted",
279                ip_str
280            );
281        }
282
283        // Test boundaries (192.167 and 192.169 should NOT be restricted)
284        assert!(!is_restricted_ip(IpAddr::V4(
285            "192.167.0.0".parse().unwrap()
286        )));
287        assert!(!is_restricted_ip(IpAddr::V4(
288            "192.169.0.0".parse().unwrap()
289        )));
290    }
291
292    #[test]
293    fn test_ipv4_link_local_is_restricted() {
294        let ips = vec![
295            "169.254.0.0",
296            "169.254.169.254", // AWS metadata endpoint
297            "169.254.255.255",
298        ];
299        for ip_str in ips {
300            let ip: Ipv4Addr = ip_str.parse().unwrap();
301            assert!(
302                is_restricted_ip(IpAddr::V4(ip)),
303                "Link-local IP {} should be restricted",
304                ip_str
305            );
306        }
307
308        // Test boundaries
309        assert!(!is_restricted_ip(IpAddr::V4(
310            "169.253.255.255".parse().unwrap()
311        )));
312        assert!(!is_restricted_ip(IpAddr::V4(
313            "169.255.0.0".parse().unwrap()
314        )));
315    }
316
317    #[test]
318    fn test_ipv4_public_is_not_restricted() {
319        let ips = vec![
320            "8.8.8.8",       // Google DNS
321            "1.1.1.1",       // Cloudflare DNS
322            "93.184.216.34", // example.com
323            "151.101.1.140", // Reddit
324            "13.107.42.14",  // Microsoft
325        ];
326        for ip_str in ips {
327            let ip: Ipv4Addr = ip_str.parse().unwrap();
328            assert!(
329                !is_restricted_ip(IpAddr::V4(ip)),
330                "Public IP {} should NOT be restricted",
331                ip_str
332            );
333        }
334    }
335
336    #[test]
337    fn test_ipv6_loopback_is_restricted() {
338        let ip: Ipv6Addr = "::1".parse().unwrap();
339        assert!(is_restricted_ip(IpAddr::V6(ip)));
340    }
341
342    #[test]
343    fn test_ipv6_unique_local_is_restricted() {
344        let ips = vec![
345            "fc00::",
346            "fc00::1",
347            "fd00::1",
348            "fdff:ffff:ffff:ffff:ffff:ffff:ffff:ffff",
349        ];
350        for ip_str in ips {
351            let ip: Ipv6Addr = ip_str.parse().unwrap();
352            assert!(
353                is_restricted_ip(IpAddr::V6(ip)),
354                "Unique local IPv6 {} should be restricted",
355                ip_str
356            );
357        }
358    }
359
360    #[test]
361    fn test_ipv6_link_local_is_restricted() {
362        let ips = vec![
363            "fe80::",
364            "fe80::1",
365            "febf:ffff:ffff:ffff:ffff:ffff:ffff:ffff",
366        ];
367        for ip_str in ips {
368            let ip: Ipv6Addr = ip_str.parse().unwrap();
369            assert!(
370                is_restricted_ip(IpAddr::V6(ip)),
371                "Link-local IPv6 {} should be restricted",
372                ip_str
373            );
374        }
375    }
376
377    #[test]
378    fn test_ipv6_public_is_not_restricted() {
379        let ips = vec![
380            "2001:4860:4860::8888", // Google DNS
381            "2606:4700:4700::1111", // Cloudflare DNS
382            "2001:db8::1",          // Documentation prefix
383        ];
384        for ip_str in ips {
385            let ip: Ipv6Addr = ip_str.parse().unwrap();
386            assert!(
387                !is_restricted_ip(IpAddr::V6(ip)),
388                "Public IPv6 {} should NOT be restricted",
389                ip_str
390            );
391        }
392    }
393
394    // Tests for validate_url()
395
396    #[test]
397    fn test_validate_url_valid_https() {
398        let result = validate_url("https://example.com/file.hxz", false);
399        assert!(result.is_ok(), "HTTPS URL should be valid");
400    }
401
402    #[test]
403    fn test_validate_url_valid_http() {
404        let result = validate_url("http://example.com/file.hxz", false);
405        assert!(result.is_ok(), "HTTP URL should be valid");
406    }
407
408    #[test]
409    fn test_validate_url_invalid_scheme_ftp() {
410        let result = validate_url("ftp://example.com/file.hxz", false);
411        assert!(result.is_err(), "FTP scheme should be rejected");
412        let err_msg = result.unwrap_err().to_string();
413        assert!(err_msg.to_lowercase().contains("http"));
414    }
415
416    #[test]
417    fn test_validate_url_invalid_scheme_file() {
418        let result = validate_url("file:///etc/passwd", false);
419        assert!(result.is_err(), "file:// scheme should be rejected");
420    }
421
422    #[test]
423    fn test_validate_url_malformed() {
424        let result = validate_url("not a url", false);
425        assert!(result.is_err(), "Malformed URL should be rejected");
426    }
427
428    #[test]
429    fn test_validate_url_missing_host() {
430        let result = validate_url("http://", false);
431        assert!(result.is_err(), "URL without host should be rejected");
432    }
433
434    #[test]
435    fn test_validate_url_ipv4_loopback_blocked() {
436        let result = validate_url("http://127.0.0.1/file.hxz", false);
437        assert!(result.is_err(), "Loopback IP should be blocked");
438        let err_msg = result.unwrap_err().to_string();
439        assert!(err_msg.to_lowercase().contains("denied"));
440    }
441
442    #[test]
443    fn test_validate_url_ipv4_private_blocked() {
444        let urls = vec![
445            "http://10.0.0.1/file.hxz",
446            "http://172.16.0.1/file.hxz",
447            "http://192.168.1.1/file.hxz",
448        ];
449        for url in urls {
450            let result = validate_url(url, false);
451            assert!(result.is_err(), "Private IP {} should be blocked", url);
452        }
453    }
454
455    #[test]
456    fn test_validate_url_ipv4_link_local_blocked() {
457        let result = validate_url("http://169.254.169.254/latest/meta-data", false);
458        assert!(result.is_err(), "AWS metadata endpoint should be blocked");
459    }
460
461    #[test]
462    fn test_validate_url_ipv6_loopback_blocked() {
463        let result = validate_url("http://[::1]/file.hxz", false);
464        assert!(result.is_err(), "IPv6 loopback should be blocked");
465    }
466
467    #[test]
468    fn test_validate_url_ipv6_unique_local_blocked() {
469        let result = validate_url("http://[fc00::1]/file.hxz", false);
470        assert!(result.is_err(), "IPv6 unique local should be blocked");
471    }
472
473    #[test]
474    fn test_validate_url_ipv6_link_local_blocked() {
475        let result = validate_url("http://[fe80::1]/file.hxz", false);
476        assert!(result.is_err(), "IPv6 link-local should be blocked");
477    }
478
479    #[test]
480    fn test_validate_url_allow_restricted_flag() {
481        let urls = vec![
482            "http://127.0.0.1/file.hxz",
483            "http://10.0.0.1/file.hxz",
484            "http://192.168.1.1/file.hxz",
485            "http://[::1]/file.hxz",
486        ];
487        for url in urls {
488            let result = validate_url(url, true);
489            assert!(
490                result.is_ok(),
491                "Restricted IP {} should be allowed with flag",
492                url
493            );
494        }
495    }
496
497    #[test]
498    fn test_validate_url_normalized_output() {
499        let input = "https://example.com:443/path?query=value";
500        let result = validate_url(input, false);
501        assert!(result.is_ok());
502        let output = result.unwrap();
503        // URL should be normalized
504        assert!(output.contains("example.com"));
505        assert!(output.contains("path"));
506    }
507
508    #[test]
509    fn test_validate_url_domain_with_port() {
510        let result = validate_url("https://example.com:8080/file.hxz", false);
511        assert!(result.is_ok(), "URL with custom port should be valid");
512    }
513
514    #[test]
515    fn test_validate_url_with_path_and_query() {
516        let result = validate_url("https://example.com/path/to/file.st?key=value", false);
517        assert!(result.is_ok(), "URL with path and query should be valid");
518    }
519
520    #[test]
521    fn test_validate_url_localhost_blocked() {
522        // Note: This test may fail if DNS is not available or if "localhost" doesn't resolve
523        // In most systems, "localhost" resolves to 127.0.0.1 which should be blocked
524        let result = validate_url("http://localhost/file.hxz", false);
525        // This might resolve to 127.0.0.1 and be blocked, or fail DNS resolution
526        // Either way, it should not succeed in default configuration
527        assert!(
528            result.is_err(),
529            "localhost should typically be blocked or fail resolution"
530        );
531    }
532
533    #[test]
534    fn test_validate_url_empty_string() {
535        let result = validate_url("", false);
536        assert!(result.is_err(), "Empty URL should be rejected");
537    }
538
539    #[test]
540    fn test_validate_url_brackets_in_domain() {
541        // Test the bracket-stripping logic for domains
542        let result = validate_url("http://example.com/file.hxz", false);
543        assert!(result.is_ok());
544    }
545}