Skip to main content

graphify_security/
url_validator.rs

1//! URL validation and SSRF prevention.
2
3use url::Url;
4
5use crate::SecurityError;
6
7/// Maximum fetch size: 50 MB.
8pub const MAX_FETCH_SIZE: usize = 50 * 1024 * 1024;
9
10/// Maximum "safe" size for in-memory processing: 10 MB.
11pub const MAX_SAFE_SIZE: usize = 10 * 1024 * 1024;
12
13/// Validate a URL: must be http/https, must not resolve to private/localhost IPs.
14///
15/// Returns the parsed [`Url`] on success.
16pub fn validate_url(url_str: &str) -> Result<Url, SecurityError> {
17    let url = Url::parse(url_str)?;
18
19    // Only allow http/https
20    if url.scheme() != "http" && url.scheme() != "https" {
21        return Err(SecurityError::InvalidScheme(url.scheme().to_string()));
22    }
23
24    // Block private/reserved IPs
25    if let Some(host) = url.host_str() {
26        if is_private_host(host) {
27            return Err(SecurityError::PrivateIp(host.to_string()));
28        }
29    } else {
30        return Err(SecurityError::PrivateIp("(no host)".to_string()));
31    }
32
33    Ok(url)
34}
35
36/// Check whether a host string refers to a private or reserved address.
37fn is_private_host(host: &str) -> bool {
38    // Exact matches
39    if host == "localhost" || host == "::1" || host == "[::1]" {
40        return true;
41    }
42
43    // Prefix-based checks for IPv4 private/reserved ranges
44    if host.starts_with("127.")
45        || host.starts_with("10.")
46        || host.starts_with("192.168.")
47        || host.starts_with("169.254.")
48        || host.starts_with("0.")
49    {
50        return true;
51    }
52
53    // 172.16.0.0 – 172.31.255.255
54    if is_172_private(host) {
55        return true;
56    }
57
58    false
59}
60
61/// Check whether a host falls in the 172.16.0.0/12 private range.
62fn is_172_private(host: &str) -> bool {
63    if let Some(rest) = host.strip_prefix("172.")
64        && let Some(second_octet_str) = rest.split('.').next()
65        && let Ok(second_octet) = second_octet_str.parse::<u8>()
66    {
67        return (16..=31).contains(&second_octet);
68    }
69    false
70}
71
72#[cfg(test)]
73mod tests {
74    use super::*;
75
76    #[test]
77    fn test_valid_https_url() {
78        let result = validate_url("https://example.com/page");
79        assert!(result.is_ok());
80        assert_eq!(result.unwrap().host_str(), Some("example.com"));
81    }
82
83    #[test]
84    fn test_valid_http_url() {
85        let result = validate_url("http://example.com");
86        assert!(result.is_ok());
87    }
88
89    #[test]
90    fn test_reject_ftp_scheme() {
91        let result = validate_url("ftp://example.com/file");
92        assert!(matches!(result, Err(SecurityError::InvalidScheme(_))));
93    }
94
95    #[test]
96    fn test_reject_file_scheme() {
97        let result = validate_url("file:///etc/passwd");
98        assert!(matches!(result, Err(SecurityError::InvalidScheme(_))));
99    }
100
101    #[test]
102    fn test_reject_javascript_scheme() {
103        let result = validate_url("javascript:alert(1)");
104        assert!(result.is_err());
105    }
106
107    #[test]
108    fn test_reject_localhost() {
109        let result = validate_url("http://localhost:8080/api");
110        assert!(matches!(result, Err(SecurityError::PrivateIp(_))));
111    }
112
113    #[test]
114    fn test_reject_127() {
115        let result = validate_url("http://127.0.0.1/admin");
116        assert!(matches!(result, Err(SecurityError::PrivateIp(_))));
117    }
118
119    #[test]
120    fn test_reject_10_network() {
121        let result = validate_url("http://10.0.0.1/internal");
122        assert!(matches!(result, Err(SecurityError::PrivateIp(_))));
123    }
124
125    #[test]
126    fn test_reject_192_168() {
127        let result = validate_url("http://192.168.1.1/router");
128        assert!(matches!(result, Err(SecurityError::PrivateIp(_))));
129    }
130
131    #[test]
132    fn test_reject_172_16() {
133        let result = validate_url("http://172.16.0.1/secret");
134        assert!(matches!(result, Err(SecurityError::PrivateIp(_))));
135    }
136
137    #[test]
138    fn test_reject_172_31() {
139        let result = validate_url("http://172.31.255.255/secret");
140        assert!(matches!(result, Err(SecurityError::PrivateIp(_))));
141    }
142
143    #[test]
144    fn test_allow_172_32() {
145        let result = validate_url("http://172.32.0.1/public");
146        assert!(result.is_ok());
147    }
148
149    #[test]
150    fn test_reject_link_local() {
151        let result = validate_url("http://169.254.169.254/latest/meta-data/");
152        assert!(matches!(result, Err(SecurityError::PrivateIp(_))));
153    }
154
155    #[test]
156    fn test_reject_ipv6_loopback() {
157        let result = validate_url("http://[::1]/");
158        assert!(matches!(result, Err(SecurityError::PrivateIp(_))));
159    }
160
161    #[test]
162    fn test_reject_zero_ip() {
163        let result = validate_url("http://0.0.0.0/");
164        assert!(matches!(result, Err(SecurityError::PrivateIp(_))));
165    }
166
167    #[test]
168    fn test_invalid_url() {
169        let result = validate_url("not a url at all");
170        assert!(result.is_err());
171    }
172
173    #[test]
174    fn test_constants() {
175        assert_eq!(MAX_FETCH_SIZE, 50 * 1024 * 1024);
176        assert_eq!(MAX_SAFE_SIZE, 10 * 1024 * 1024);
177    }
178}