feedparser_rs/http/
validation.rs

1use crate::error::{FeedError, Result};
2use std::net::{Ipv4Addr, Ipv6Addr};
3use url::Url;
4
5// Localhost variations that should be blocked
6const LOCALHOST_VARIANTS: &[&str] = &[
7    "localhost",
8    "localhost.localdomain",
9    "127.0.0.1",
10    "::1",
11    "[::1]",
12];
13
14// Internal TLDs that should be blocked
15const INTERNAL_TLDS: &[&str] = &[
16    ".local",
17    ".localhost",
18    ".internal",
19    ".intranet",
20    ".corp",
21    ".home",
22    ".lan",
23];
24
25// Cloud metadata endpoints that should be blocked
26const METADATA_DOMAINS: &[&str] = &[
27    "metadata.google.internal",
28    "169.254.169.254",
29    "metadata",
30    "metadata.azure.com",
31];
32
33/// Validates a URL to prevent Server-Side Request Forgery (SSRF) attacks
34///
35/// This function ensures that URLs only point to public, safe destinations.
36///
37/// # Security Checks
38///
39/// 1. Only HTTP and HTTPS schemes are allowed
40/// 2. Private IP ranges are blocked (RFC 1918, RFC 4193)
41/// 3. Localhost and loopback addresses are blocked
42/// 4. Link-local addresses are blocked (169.254.0.0/16)
43/// 5. Cloud metadata endpoints are blocked
44/// 6. Internal domain names are blocked (.local, .internal)
45///
46/// # Errors
47///
48/// Returns `FeedError::Http` if:
49/// - The URL is malformed or invalid
50/// - The URL scheme is not HTTP or HTTPS
51/// - The URL points to a private IP address, localhost, or internal domain
52/// - The URL points to a cloud metadata endpoint
53///
54/// # Examples
55///
56/// ```
57/// use feedparser_rs::http::validation::validate_url;
58///
59/// // These are allowed
60/// assert!(validate_url("https://example.com/feed.xml").is_ok());
61/// assert!(validate_url("http://blog.example.org/rss").is_ok());
62///
63/// // These are blocked
64/// assert!(validate_url("http://localhost/").is_err());
65/// assert!(validate_url("http://192.168.1.1/").is_err());
66/// assert!(validate_url("http://169.254.169.254/").is_err());
67/// assert!(validate_url("file:///etc/passwd").is_err());
68/// ```
69pub fn validate_url(url_str: &str) -> Result<Url> {
70    // Parse URL
71    let url = Url::parse(url_str).map_err(|e| FeedError::Http {
72        message: format!("Invalid URL: {e}"),
73    })?;
74
75    // Check 1: Only allow HTTP/HTTPS schemes
76    match url.scheme() {
77        "http" | "https" => {}
78        scheme => {
79            return Err(FeedError::Http {
80                message: format!(
81                    "Unsupported URL scheme '{scheme}': only 'http' and 'https' are allowed"
82                ),
83            });
84        }
85    }
86
87    // Check 2: URL must have a host
88    let host = url.host().ok_or_else(|| FeedError::Http {
89        message: "URL must have a host".to_string(),
90    })?;
91
92    // Check 3: Validate host based on type
93    match host {
94        url::Host::Ipv4(ip) => {
95            validate_ipv4(ip)?;
96        }
97        url::Host::Ipv6(ip) => {
98            validate_ipv6(ip)?;
99        }
100        url::Host::Domain(domain) => {
101            validate_domain(domain)?;
102        }
103    }
104
105    Ok(url)
106}
107
108/// Validates an IPv4 address to prevent SSRF
109fn validate_ipv4(ip: Ipv4Addr) -> Result<()> {
110    if ip.is_private() {
111        return Err(FeedError::Http {
112            message: format!("Private IP address not allowed: {ip} (RFC 1918)"),
113        });
114    }
115
116    if ip.is_loopback() {
117        return Err(FeedError::Http {
118            message: format!("Loopback address not allowed: {ip}"),
119        });
120    }
121
122    if ip.is_link_local() {
123        return Err(FeedError::Http {
124            message: format!("Link-local address not allowed: {ip} (169.254.0.0/16)"),
125        });
126    }
127
128    if ip.is_broadcast() {
129        return Err(FeedError::Http {
130            message: format!("Broadcast address not allowed: {ip}"),
131        });
132    }
133
134    if ip.is_documentation() {
135        return Err(FeedError::Http {
136            message: format!("Documentation IP not allowed: {ip} (RFC 5737)"),
137        });
138    }
139
140    // Block cloud metadata endpoints specifically
141    let octets = ip.octets();
142    if octets[0] == 169 && octets[1] == 254 && octets[2] == 169 && octets[3] == 254 {
143        return Err(FeedError::Http {
144            message: "AWS metadata endpoint blocked: 169.254.169.254".to_string(),
145        });
146    }
147
148    // Block carrier-grade NAT (100.64.0.0/10)
149    if octets[0] == 100 && (octets[1] & 0xC0) == 64 {
150        return Err(FeedError::Http {
151            message: format!("Carrier-grade NAT address not allowed: {ip} (100.64.0.0/10)"),
152        });
153    }
154
155    // Block 0.0.0.0/8
156    if octets[0] == 0 {
157        return Err(FeedError::Http {
158            message: format!("0.0.0.0/8 range not allowed: {ip}"),
159        });
160    }
161
162    Ok(())
163}
164
165/// Validates an IPv6 address to prevent SSRF
166fn validate_ipv6(ip: Ipv6Addr) -> Result<()> {
167    if ip.is_loopback() {
168        return Err(FeedError::Http {
169            message: format!("IPv6 loopback address not allowed: {ip}"),
170        });
171    }
172
173    if ip.is_unicast_link_local() {
174        return Err(FeedError::Http {
175            message: format!("IPv6 link-local address not allowed: {ip} (fe80::/10)"),
176        });
177    }
178
179    // Check for Unique Local Addresses (ULA) - fc00::/7
180    let segments = ip.segments();
181    if (segments[0] & 0xFE00) == 0xFC00 {
182        return Err(FeedError::Http {
183            message: format!("IPv6 unique local address not allowed: {ip} (fc00::/7)"),
184        });
185    }
186
187    // Block multicast addresses
188    if ip.is_multicast() {
189        return Err(FeedError::Http {
190            message: format!("IPv6 multicast address not allowed: {ip} (ff00::/8)"),
191        });
192    }
193
194    Ok(())
195}
196
197/// Validates a domain name to prevent SSRF
198fn validate_domain(domain: &str) -> Result<()> {
199    let domain_lower = domain.to_lowercase();
200
201    // Block localhost variations
202    if LOCALHOST_VARIANTS.contains(&domain_lower.as_str()) {
203        return Err(FeedError::Http {
204            message: format!("Localhost domain not allowed: {domain}"),
205        });
206    }
207
208    // Block internal TLDs
209    for tld in INTERNAL_TLDS {
210        if domain_lower.ends_with(tld) {
211            return Err(FeedError::Http {
212                message: format!("Internal domain TLD not allowed: {domain}"),
213            });
214        }
215    }
216
217    // Block cloud metadata endpoints
218    if METADATA_DOMAINS.contains(&domain_lower.as_str()) {
219        return Err(FeedError::Http {
220            message: format!("Cloud metadata domain not allowed: {domain}"),
221        });
222    }
223
224    Ok(())
225}
226
227#[cfg(test)]
228mod tests {
229    use super::*;
230
231    // Positive tests - these should pass
232    #[test]
233    fn test_valid_http_url() {
234        assert!(validate_url("http://example.com/feed.xml").is_ok());
235    }
236
237    #[test]
238    fn test_valid_https_url() {
239        assert!(validate_url("https://blog.example.org/rss").is_ok());
240    }
241
242    #[test]
243    fn test_valid_with_port() {
244        assert!(validate_url("https://example.com:8443/feed").is_ok());
245    }
246
247    #[test]
248    fn test_valid_with_path() {
249        assert!(validate_url("https://example.com/path/to/feed.xml").is_ok());
250    }
251
252    // Negative tests - scheme validation
253    #[test]
254    fn test_reject_file_scheme() {
255        assert!(validate_url("file:///etc/passwd").is_err());
256    }
257
258    #[test]
259    fn test_reject_ftp_scheme() {
260        assert!(validate_url("ftp://example.com/file").is_err());
261    }
262
263    #[test]
264    fn test_reject_javascript_scheme() {
265        assert!(validate_url("javascript:alert(1)").is_err());
266    }
267
268    #[test]
269    fn test_reject_data_scheme() {
270        assert!(validate_url("data:text/html,<script>alert(1)</script>").is_err());
271    }
272
273    // Negative tests - IPv4 private ranges
274    #[test]
275    fn test_reject_ipv4_private_10() {
276        assert!(validate_url("http://10.0.0.1/").is_err());
277        assert!(validate_url("http://10.255.255.255/").is_err());
278    }
279
280    #[test]
281    fn test_reject_ipv4_private_172() {
282        assert!(validate_url("http://172.16.0.1/").is_err());
283        assert!(validate_url("http://172.31.255.255/").is_err());
284    }
285
286    #[test]
287    fn test_reject_ipv4_private_192() {
288        assert!(validate_url("http://192.168.0.1/").is_err());
289        assert!(validate_url("http://192.168.255.255/").is_err());
290    }
291
292    #[test]
293    fn test_reject_ipv4_localhost() {
294        assert!(validate_url("http://127.0.0.1/").is_err());
295        assert!(validate_url("http://127.0.0.2/").is_err());
296    }
297
298    #[test]
299    fn test_reject_ipv4_link_local() {
300        assert!(validate_url("http://169.254.169.254/").is_err());
301        assert!(validate_url("http://169.254.0.1/").is_err());
302    }
303
304    #[test]
305    fn test_reject_ipv4_zero() {
306        assert!(validate_url("http://0.0.0.0/").is_err());
307    }
308
309    #[test]
310    fn test_reject_ipv4_broadcast() {
311        assert!(validate_url("http://255.255.255.255/").is_err());
312    }
313
314    // Negative tests - IPv6
315    #[test]
316    fn test_reject_ipv6_loopback() {
317        assert!(validate_url("http://[::1]/").is_err());
318    }
319
320    #[test]
321    fn test_reject_ipv6_link_local() {
322        assert!(validate_url("http://[fe80::1]/").is_err());
323    }
324
325    #[test]
326    fn test_reject_ipv6_unique_local() {
327        assert!(validate_url("http://[fc00::1]/").is_err());
328        assert!(validate_url("http://[fd00::1]/").is_err());
329    }
330
331    // Negative tests - domain names
332    #[test]
333    fn test_reject_localhost_domain() {
334        assert!(validate_url("http://localhost/").is_err());
335    }
336
337    #[test]
338    fn test_reject_local_tld() {
339        assert!(validate_url("http://myserver.local/").is_err());
340    }
341
342    #[test]
343    fn test_reject_internal_tld() {
344        assert!(validate_url("http://server.internal/").is_err());
345    }
346
347    #[test]
348    fn test_reject_cloud_metadata() {
349        assert!(validate_url("http://metadata.google.internal/").is_err());
350        assert!(validate_url("http://metadata.azure.com/").is_err());
351    }
352
353    // Edge cases
354    #[test]
355    fn test_reject_no_host() {
356        assert!(validate_url("http://").is_err());
357    }
358
359    #[test]
360    fn test_reject_invalid_url() {
361        assert!(validate_url("not a url").is_err());
362    }
363
364    #[test]
365    fn test_public_ip_allowed() {
366        // Public IPs should be allowed
367        assert!(validate_url("http://8.8.8.8/").is_ok());
368        assert!(validate_url("http://1.1.1.1/").is_ok());
369    }
370
371    #[test]
372    fn test_carrier_grade_nat_blocked() {
373        assert!(validate_url("http://100.64.0.1/").is_err());
374        assert!(validate_url("http://100.127.255.255/").is_err());
375    }
376
377    #[test]
378    fn test_ipv6_multicast_blocked() {
379        assert!(validate_url("http://[ff00::1]/").is_err());
380        assert!(validate_url("http://[ff02::1]/").is_err());
381    }
382}