feedparser_rs/http/
validation.rs1use crate::error::{FeedError, Result};
2use std::net::{Ipv4Addr, Ipv6Addr};
3use url::Url;
4
5const LOCALHOST_VARIANTS: &[&str] = &[
7 "localhost",
8 "localhost.localdomain",
9 "127.0.0.1",
10 "::1",
11 "[::1]",
12];
13
14const INTERNAL_TLDS: &[&str] = &[
16 ".local",
17 ".localhost",
18 ".internal",
19 ".intranet",
20 ".corp",
21 ".home",
22 ".lan",
23];
24
25const METADATA_DOMAINS: &[&str] = &[
27 "metadata.google.internal",
28 "169.254.169.254",
29 "metadata",
30 "metadata.azure.com",
31];
32
33pub fn validate_url(url_str: &str) -> Result<Url> {
70 let url = Url::parse(url_str).map_err(|e| FeedError::Http {
72 message: format!("Invalid URL: {e}"),
73 })?;
74
75 match url.scheme() {
77 "http" | "https" => {}
78 scheme => {
79 return Err(FeedError::Http {
80 message: format!(
81 "Unsupported URL scheme '{scheme}': only 'http' and 'https' are allowed"
82 ),
83 });
84 }
85 }
86
87 let host = url.host().ok_or_else(|| FeedError::Http {
89 message: "URL must have a host".to_string(),
90 })?;
91
92 match host {
94 url::Host::Ipv4(ip) => {
95 validate_ipv4(ip)?;
96 }
97 url::Host::Ipv6(ip) => {
98 validate_ipv6(ip)?;
99 }
100 url::Host::Domain(domain) => {
101 validate_domain(domain)?;
102 }
103 }
104
105 Ok(url)
106}
107
108fn validate_ipv4(ip: Ipv4Addr) -> Result<()> {
110 if ip.is_private() {
111 return Err(FeedError::Http {
112 message: format!("Private IP address not allowed: {ip} (RFC 1918)"),
113 });
114 }
115
116 if ip.is_loopback() {
117 return Err(FeedError::Http {
118 message: format!("Loopback address not allowed: {ip}"),
119 });
120 }
121
122 if ip.is_link_local() {
123 return Err(FeedError::Http {
124 message: format!("Link-local address not allowed: {ip} (169.254.0.0/16)"),
125 });
126 }
127
128 if ip.is_broadcast() {
129 return Err(FeedError::Http {
130 message: format!("Broadcast address not allowed: {ip}"),
131 });
132 }
133
134 if ip.is_documentation() {
135 return Err(FeedError::Http {
136 message: format!("Documentation IP not allowed: {ip} (RFC 5737)"),
137 });
138 }
139
140 let octets = ip.octets();
142 if octets[0] == 169 && octets[1] == 254 && octets[2] == 169 && octets[3] == 254 {
143 return Err(FeedError::Http {
144 message: "AWS metadata endpoint blocked: 169.254.169.254".to_string(),
145 });
146 }
147
148 if octets[0] == 100 && (octets[1] & 0xC0) == 64 {
150 return Err(FeedError::Http {
151 message: format!("Carrier-grade NAT address not allowed: {ip} (100.64.0.0/10)"),
152 });
153 }
154
155 if octets[0] == 0 {
157 return Err(FeedError::Http {
158 message: format!("0.0.0.0/8 range not allowed: {ip}"),
159 });
160 }
161
162 Ok(())
163}
164
165fn validate_ipv6(ip: Ipv6Addr) -> Result<()> {
167 if ip.is_loopback() {
168 return Err(FeedError::Http {
169 message: format!("IPv6 loopback address not allowed: {ip}"),
170 });
171 }
172
173 if ip.is_unicast_link_local() {
174 return Err(FeedError::Http {
175 message: format!("IPv6 link-local address not allowed: {ip} (fe80::/10)"),
176 });
177 }
178
179 let segments = ip.segments();
181 if (segments[0] & 0xFE00) == 0xFC00 {
182 return Err(FeedError::Http {
183 message: format!("IPv6 unique local address not allowed: {ip} (fc00::/7)"),
184 });
185 }
186
187 if ip.is_multicast() {
189 return Err(FeedError::Http {
190 message: format!("IPv6 multicast address not allowed: {ip} (ff00::/8)"),
191 });
192 }
193
194 Ok(())
195}
196
197fn validate_domain(domain: &str) -> Result<()> {
199 let domain_lower = domain.to_lowercase();
200
201 if LOCALHOST_VARIANTS.contains(&domain_lower.as_str()) {
203 return Err(FeedError::Http {
204 message: format!("Localhost domain not allowed: {domain}"),
205 });
206 }
207
208 for tld in INTERNAL_TLDS {
210 if domain_lower.ends_with(tld) {
211 return Err(FeedError::Http {
212 message: format!("Internal domain TLD not allowed: {domain}"),
213 });
214 }
215 }
216
217 if METADATA_DOMAINS.contains(&domain_lower.as_str()) {
219 return Err(FeedError::Http {
220 message: format!("Cloud metadata domain not allowed: {domain}"),
221 });
222 }
223
224 Ok(())
225}
226
227#[cfg(test)]
228mod tests {
229 use super::*;
230
231 #[test]
233 fn test_valid_http_url() {
234 assert!(validate_url("http://example.com/feed.xml").is_ok());
235 }
236
237 #[test]
238 fn test_valid_https_url() {
239 assert!(validate_url("https://blog.example.org/rss").is_ok());
240 }
241
242 #[test]
243 fn test_valid_with_port() {
244 assert!(validate_url("https://example.com:8443/feed").is_ok());
245 }
246
247 #[test]
248 fn test_valid_with_path() {
249 assert!(validate_url("https://example.com/path/to/feed.xml").is_ok());
250 }
251
252 #[test]
254 fn test_reject_file_scheme() {
255 assert!(validate_url("file:///etc/passwd").is_err());
256 }
257
258 #[test]
259 fn test_reject_ftp_scheme() {
260 assert!(validate_url("ftp://example.com/file").is_err());
261 }
262
263 #[test]
264 fn test_reject_javascript_scheme() {
265 assert!(validate_url("javascript:alert(1)").is_err());
266 }
267
268 #[test]
269 fn test_reject_data_scheme() {
270 assert!(validate_url("data:text/html,<script>alert(1)</script>").is_err());
271 }
272
273 #[test]
275 fn test_reject_ipv4_private_10() {
276 assert!(validate_url("http://10.0.0.1/").is_err());
277 assert!(validate_url("http://10.255.255.255/").is_err());
278 }
279
280 #[test]
281 fn test_reject_ipv4_private_172() {
282 assert!(validate_url("http://172.16.0.1/").is_err());
283 assert!(validate_url("http://172.31.255.255/").is_err());
284 }
285
286 #[test]
287 fn test_reject_ipv4_private_192() {
288 assert!(validate_url("http://192.168.0.1/").is_err());
289 assert!(validate_url("http://192.168.255.255/").is_err());
290 }
291
292 #[test]
293 fn test_reject_ipv4_localhost() {
294 assert!(validate_url("http://127.0.0.1/").is_err());
295 assert!(validate_url("http://127.0.0.2/").is_err());
296 }
297
298 #[test]
299 fn test_reject_ipv4_link_local() {
300 assert!(validate_url("http://169.254.169.254/").is_err());
301 assert!(validate_url("http://169.254.0.1/").is_err());
302 }
303
304 #[test]
305 fn test_reject_ipv4_zero() {
306 assert!(validate_url("http://0.0.0.0/").is_err());
307 }
308
309 #[test]
310 fn test_reject_ipv4_broadcast() {
311 assert!(validate_url("http://255.255.255.255/").is_err());
312 }
313
314 #[test]
316 fn test_reject_ipv6_loopback() {
317 assert!(validate_url("http://[::1]/").is_err());
318 }
319
320 #[test]
321 fn test_reject_ipv6_link_local() {
322 assert!(validate_url("http://[fe80::1]/").is_err());
323 }
324
325 #[test]
326 fn test_reject_ipv6_unique_local() {
327 assert!(validate_url("http://[fc00::1]/").is_err());
328 assert!(validate_url("http://[fd00::1]/").is_err());
329 }
330
331 #[test]
333 fn test_reject_localhost_domain() {
334 assert!(validate_url("http://localhost/").is_err());
335 }
336
337 #[test]
338 fn test_reject_local_tld() {
339 assert!(validate_url("http://myserver.local/").is_err());
340 }
341
342 #[test]
343 fn test_reject_internal_tld() {
344 assert!(validate_url("http://server.internal/").is_err());
345 }
346
347 #[test]
348 fn test_reject_cloud_metadata() {
349 assert!(validate_url("http://metadata.google.internal/").is_err());
350 assert!(validate_url("http://metadata.azure.com/").is_err());
351 }
352
353 #[test]
355 fn test_reject_no_host() {
356 assert!(validate_url("http://").is_err());
357 }
358
359 #[test]
360 fn test_reject_invalid_url() {
361 assert!(validate_url("not a url").is_err());
362 }
363
364 #[test]
365 fn test_public_ip_allowed() {
366 assert!(validate_url("http://8.8.8.8/").is_ok());
368 assert!(validate_url("http://1.1.1.1/").is_ok());
369 }
370
371 #[test]
372 fn test_carrier_grade_nat_blocked() {
373 assert!(validate_url("http://100.64.0.1/").is_err());
374 assert!(validate_url("http://100.127.255.255/").is_err());
375 }
376
377 #[test]
378 fn test_ipv6_multicast_blocked() {
379 assert!(validate_url("http://[ff00::1]/").is_err());
380 assert!(validate_url("http://[ff02::1]/").is_err());
381 }
382}