graphify_security/
url_validator.rs1use url::Url;
4
5use crate::SecurityError;
6
7pub const MAX_FETCH_SIZE: usize = 50 * 1024 * 1024;
9
10pub const MAX_SAFE_SIZE: usize = 10 * 1024 * 1024;
12
13pub fn validate_url(url_str: &str) -> Result<Url, SecurityError> {
19 let url = Url::parse(url_str)?;
20
21 if url.scheme() != "http" && url.scheme() != "https" {
22 return Err(SecurityError::InvalidScheme(url.scheme().to_string()));
23 }
24
25 if let Some(host) = url.host_str() {
26 if is_private_host(host) {
27 return Err(SecurityError::PrivateIp(host.to_string()));
28 }
29 } else {
30 return Err(SecurityError::PrivateIp("(no host)".to_string()));
31 }
32
33 Ok(url)
34}
35
36fn is_private_host(host: &str) -> bool {
38 if host == "localhost" {
39 return true;
40 }
41
42 if let Ok(ip) = host
43 .trim_start_matches('[')
44 .trim_end_matches(']')
45 .parse::<std::net::IpAddr>()
46 {
47 return ip_is_private(&ip);
48 }
49
50 if let Some(ipv4) = parse_nonstandard_ipv4(host) {
51 return ip_is_private(&std::net::IpAddr::V4(ipv4));
52 }
53
54 false
55}
56
57fn ip_is_private(ip: &std::net::IpAddr) -> bool {
59 match ip {
60 std::net::IpAddr::V4(v4) => {
61 v4.is_loopback()
62 || v4.is_private()
63 || v4.is_link_local()
64 || v4.is_unspecified()
65 || is_in_range(
66 v4,
67 &std::net::Ipv4Addr::new(100, 64, 0, 0),
68 &std::net::Ipv4Addr::new(100, 127, 255, 255),
69 )
70 || is_in_range(
71 v4,
72 &std::net::Ipv4Addr::new(198, 18, 0, 0),
73 &std::net::Ipv4Addr::new(198, 19, 255, 255),
74 )
75 }
76 std::net::IpAddr::V6(v6) => {
77 v6.is_loopback()
78 || v6.is_unspecified()
79 || matches!(v6.octets()[0] & 0xfe, 0xfc)
80 || matches!(v6.octets()[0], 0xfe) && matches!(v6.octets()[1] & 0xc0, 0x80)
81 }
82 }
83}
84
85fn is_in_range(
86 ip: &std::net::Ipv4Addr,
87 start: &std::net::Ipv4Addr,
88 end: &std::net::Ipv4Addr,
89) -> bool {
90 let ip_u32 = u32::from(*ip);
91 ip_u32 >= u32::from(*start) && ip_u32 <= u32::from(*end)
92}
93
94fn parse_nonstandard_ipv4(host: &str) -> Option<std::net::Ipv4Addr> {
96 if let Ok(num) = host.parse::<u32>() {
97 return Some(std::net::Ipv4Addr::from(num));
98 }
99 if let Some(hex) = host.strip_prefix("0x").or_else(|| host.strip_prefix("0X"))
100 && let Ok(num) = u32::from_str_radix(hex, 16)
101 {
102 return Some(std::net::Ipv4Addr::from(num));
103 }
104 None
105}
106
107#[cfg(test)]
108mod tests {
109 use super::*;
110
111 #[test]
112 fn test_valid_https_url() {
113 let result = validate_url("https://example.com/page");
114 assert!(result.is_ok());
115 assert_eq!(result.unwrap().host_str(), Some("example.com"));
116 }
117
118 #[test]
119 fn test_valid_http_url() {
120 let result = validate_url("http://example.com");
121 assert!(result.is_ok());
122 }
123
124 #[test]
125 fn test_reject_ftp_scheme() {
126 let result = validate_url("ftp://example.com/file");
127 assert!(matches!(result, Err(SecurityError::InvalidScheme(_))));
128 }
129
130 #[test]
131 fn test_reject_file_scheme() {
132 let result = validate_url("file:///etc/passwd");
133 assert!(matches!(result, Err(SecurityError::InvalidScheme(_))));
134 }
135
136 #[test]
137 fn test_reject_javascript_scheme() {
138 let result = validate_url("javascript:alert(1)");
139 assert!(result.is_err());
140 }
141
142 #[test]
143 fn test_reject_localhost() {
144 let result = validate_url("http://localhost:8080/api");
145 assert!(matches!(result, Err(SecurityError::PrivateIp(_))));
146 }
147
148 #[test]
149 fn test_reject_127() {
150 let result = validate_url("http://127.0.0.1/admin");
151 assert!(matches!(result, Err(SecurityError::PrivateIp(_))));
152 }
153
154 #[test]
155 fn test_reject_10_network() {
156 let result = validate_url("http://10.0.0.1/internal");
157 assert!(matches!(result, Err(SecurityError::PrivateIp(_))));
158 }
159
160 #[test]
161 fn test_reject_192_168() {
162 let result = validate_url("http://192.168.1.1/router");
163 assert!(matches!(result, Err(SecurityError::PrivateIp(_))));
164 }
165
166 #[test]
167 fn test_reject_172_16() {
168 let result = validate_url("http://172.16.0.1/secret");
169 assert!(matches!(result, Err(SecurityError::PrivateIp(_))));
170 }
171
172 #[test]
173 fn test_reject_172_31() {
174 let result = validate_url("http://172.31.255.255/secret");
175 assert!(matches!(result, Err(SecurityError::PrivateIp(_))));
176 }
177
178 #[test]
179 fn test_allow_172_32() {
180 let result = validate_url("http://172.32.0.1/public");
181 assert!(result.is_ok());
182 }
183
184 #[test]
185 fn test_reject_link_local() {
186 let result = validate_url("http://169.254.169.254/latest/meta-data/");
187 assert!(matches!(result, Err(SecurityError::PrivateIp(_))));
188 }
189
190 #[test]
191 fn test_reject_ipv6_loopback() {
192 let result = validate_url("http://[::1]/");
193 assert!(matches!(result, Err(SecurityError::PrivateIp(_))));
194 }
195
196 #[test]
197 fn test_reject_zero_ip() {
198 let result = validate_url("http://0.0.0.0/");
199 assert!(matches!(result, Err(SecurityError::PrivateIp(_))));
200 }
201
202 #[test]
203 fn test_invalid_url() {
204 let result = validate_url("not a url at all");
205 assert!(result.is_err());
206 }
207
208 #[test]
209 fn test_constants() {
210 assert_eq!(MAX_FETCH_SIZE, 50 * 1024 * 1024);
211 assert_eq!(MAX_SAFE_SIZE, 10 * 1024 * 1024);
212 }
213}