Skip to main content

graphify_security/
url_validator.rs

1//! URL validation and SSRF prevention.
2
3use url::Url;
4
5use crate::SecurityError;
6
7/// Maximum fetch size: 50 MB.
8pub const MAX_FETCH_SIZE: usize = 50 * 1024 * 1024;
9
10/// Maximum "safe" size for in-memory processing: 10 MB.
11pub const MAX_SAFE_SIZE: usize = 10 * 1024 * 1024;
12
13/// Validate a URL: must be http/https, must not resolve to private/localhost IPs.
14///
15/// Note: this is a static check only. It does not protect against DNS rebinding
16/// attacks where a public hostname resolves to a private IP at request time.
17/// For full SSRF protection, also check the resolved IP after DNS lookup.
18pub fn validate_url(url_str: &str) -> Result<Url, SecurityError> {
19    let url = Url::parse(url_str)?;
20
21    if url.scheme() != "http" && url.scheme() != "https" {
22        return Err(SecurityError::InvalidScheme(url.scheme().to_string()));
23    }
24
25    if let Some(host) = url.host_str() {
26        if is_private_host(host) {
27            return Err(SecurityError::PrivateIp(host.to_string()));
28        }
29    } else {
30        return Err(SecurityError::PrivateIp("(no host)".to_string()));
31    }
32
33    Ok(url)
34}
35
36/// Check whether a host string refers to a private or reserved address.
37fn is_private_host(host: &str) -> bool {
38    if host == "localhost" {
39        return true;
40    }
41
42    if let Ok(ip) = host
43        .trim_start_matches('[')
44        .trim_end_matches(']')
45        .parse::<std::net::IpAddr>()
46    {
47        return ip_is_private(&ip);
48    }
49
50    if let Some(ipv4) = parse_nonstandard_ipv4(host) {
51        return ip_is_private(&std::net::IpAddr::V4(ipv4));
52    }
53
54    false
55}
56
57/// Check if an IP address is private, loopback, link-local, or reserved.
58fn ip_is_private(ip: &std::net::IpAddr) -> bool {
59    match ip {
60        std::net::IpAddr::V4(v4) => {
61            v4.is_loopback()
62                || v4.is_private()
63                || v4.is_link_local()
64                || v4.is_unspecified()
65                || is_in_range(
66                    v4,
67                    &std::net::Ipv4Addr::new(100, 64, 0, 0),
68                    &std::net::Ipv4Addr::new(100, 127, 255, 255),
69                )
70                || is_in_range(
71                    v4,
72                    &std::net::Ipv4Addr::new(198, 18, 0, 0),
73                    &std::net::Ipv4Addr::new(198, 19, 255, 255),
74                )
75        }
76        std::net::IpAddr::V6(v6) => {
77            v6.is_loopback()
78                || v6.is_unspecified()
79                || matches!(v6.octets()[0] & 0xfe, 0xfc)
80                || matches!(v6.octets()[0], 0xfe) && matches!(v6.octets()[1] & 0xc0, 0x80)
81        }
82    }
83}
84
85fn is_in_range(
86    ip: &std::net::Ipv4Addr,
87    start: &std::net::Ipv4Addr,
88    end: &std::net::Ipv4Addr,
89) -> bool {
90    let ip_u32 = u32::from(*ip);
91    ip_u32 >= u32::from(*start) && ip_u32 <= u32::from(*end)
92}
93
94/// Try parsing non-standard IPv4 representations (decimal, hex, octal).
95fn parse_nonstandard_ipv4(host: &str) -> Option<std::net::Ipv4Addr> {
96    if let Ok(num) = host.parse::<u32>() {
97        return Some(std::net::Ipv4Addr::from(num));
98    }
99    if let Some(hex) = host.strip_prefix("0x").or_else(|| host.strip_prefix("0X"))
100        && let Ok(num) = u32::from_str_radix(hex, 16)
101    {
102        return Some(std::net::Ipv4Addr::from(num));
103    }
104    None
105}
106
107#[cfg(test)]
108mod tests {
109    use super::*;
110
111    #[test]
112    fn test_valid_https_url() {
113        let result = validate_url("https://example.com/page");
114        assert!(result.is_ok());
115        assert_eq!(result.unwrap().host_str(), Some("example.com"));
116    }
117
118    #[test]
119    fn test_valid_http_url() {
120        let result = validate_url("http://example.com");
121        assert!(result.is_ok());
122    }
123
124    #[test]
125    fn test_reject_ftp_scheme() {
126        let result = validate_url("ftp://example.com/file");
127        assert!(matches!(result, Err(SecurityError::InvalidScheme(_))));
128    }
129
130    #[test]
131    fn test_reject_file_scheme() {
132        let result = validate_url("file:///etc/passwd");
133        assert!(matches!(result, Err(SecurityError::InvalidScheme(_))));
134    }
135
136    #[test]
137    fn test_reject_javascript_scheme() {
138        let result = validate_url("javascript:alert(1)");
139        assert!(result.is_err());
140    }
141
142    #[test]
143    fn test_reject_localhost() {
144        let result = validate_url("http://localhost:8080/api");
145        assert!(matches!(result, Err(SecurityError::PrivateIp(_))));
146    }
147
148    #[test]
149    fn test_reject_127() {
150        let result = validate_url("http://127.0.0.1/admin");
151        assert!(matches!(result, Err(SecurityError::PrivateIp(_))));
152    }
153
154    #[test]
155    fn test_reject_10_network() {
156        let result = validate_url("http://10.0.0.1/internal");
157        assert!(matches!(result, Err(SecurityError::PrivateIp(_))));
158    }
159
160    #[test]
161    fn test_reject_192_168() {
162        let result = validate_url("http://192.168.1.1/router");
163        assert!(matches!(result, Err(SecurityError::PrivateIp(_))));
164    }
165
166    #[test]
167    fn test_reject_172_16() {
168        let result = validate_url("http://172.16.0.1/secret");
169        assert!(matches!(result, Err(SecurityError::PrivateIp(_))));
170    }
171
172    #[test]
173    fn test_reject_172_31() {
174        let result = validate_url("http://172.31.255.255/secret");
175        assert!(matches!(result, Err(SecurityError::PrivateIp(_))));
176    }
177
178    #[test]
179    fn test_allow_172_32() {
180        let result = validate_url("http://172.32.0.1/public");
181        assert!(result.is_ok());
182    }
183
184    #[test]
185    fn test_reject_link_local() {
186        let result = validate_url("http://169.254.169.254/latest/meta-data/");
187        assert!(matches!(result, Err(SecurityError::PrivateIp(_))));
188    }
189
190    #[test]
191    fn test_reject_ipv6_loopback() {
192        let result = validate_url("http://[::1]/");
193        assert!(matches!(result, Err(SecurityError::PrivateIp(_))));
194    }
195
196    #[test]
197    fn test_reject_zero_ip() {
198        let result = validate_url("http://0.0.0.0/");
199        assert!(matches!(result, Err(SecurityError::PrivateIp(_))));
200    }
201
202    #[test]
203    fn test_invalid_url() {
204        let result = validate_url("not a url at all");
205        assert!(result.is_err());
206    }
207
208    #[test]
209    fn test_constants() {
210        assert_eq!(MAX_FETCH_SIZE, 50 * 1024 * 1024);
211        assert_eq!(MAX_SAFE_SIZE, 10 * 1024 * 1024);
212    }
213}