use crate::error::{FeedError, Result};
use std::net::{Ipv4Addr, Ipv6Addr};
use url::Url;
const LOCALHOST_VARIANTS: &[&str] = &[
"localhost",
"localhost.localdomain",
"127.0.0.1",
"::1",
"[::1]",
];
const INTERNAL_TLDS: &[&str] = &[
".local",
".localhost",
".internal",
".intranet",
".corp",
".home",
".lan",
];
const METADATA_DOMAINS: &[&str] = &[
"metadata.google.internal",
"169.254.169.254",
"metadata",
"metadata.azure.com",
];
pub fn validate_url(url_str: &str) -> Result<Url> {
let url = Url::parse(url_str).map_err(|e| FeedError::Http {
message: format!("Invalid URL: {e}"),
})?;
match url.scheme() {
"http" | "https" => {}
scheme => {
return Err(FeedError::Http {
message: format!(
"Unsupported URL scheme '{scheme}': only 'http' and 'https' are allowed"
),
});
}
}
let host = url.host().ok_or_else(|| FeedError::Http {
message: "URL must have a host".to_string(),
})?;
match host {
url::Host::Ipv4(ip) => {
validate_ipv4(ip)?;
}
url::Host::Ipv6(ip) => {
validate_ipv6(ip)?;
}
url::Host::Domain(domain) => {
validate_domain(domain)?;
}
}
Ok(url)
}
fn validate_ipv4(ip: Ipv4Addr) -> Result<()> {
if ip.is_private() {
return Err(FeedError::Http {
message: format!("Private IP address not allowed: {ip} (RFC 1918)"),
});
}
if ip.is_loopback() {
return Err(FeedError::Http {
message: format!("Loopback address not allowed: {ip}"),
});
}
if ip.is_link_local() {
return Err(FeedError::Http {
message: format!("Link-local address not allowed: {ip} (169.254.0.0/16)"),
});
}
if ip.is_broadcast() {
return Err(FeedError::Http {
message: format!("Broadcast address not allowed: {ip}"),
});
}
if ip.is_documentation() {
return Err(FeedError::Http {
message: format!("Documentation IP not allowed: {ip} (RFC 5737)"),
});
}
let octets = ip.octets();
if octets[0] == 169 && octets[1] == 254 && octets[2] == 169 && octets[3] == 254 {
return Err(FeedError::Http {
message: "AWS metadata endpoint blocked: 169.254.169.254".to_string(),
});
}
if octets[0] == 100 && (octets[1] & 0xC0) == 64 {
return Err(FeedError::Http {
message: format!("Carrier-grade NAT address not allowed: {ip} (100.64.0.0/10)"),
});
}
if octets[0] == 0 {
return Err(FeedError::Http {
message: format!("0.0.0.0/8 range not allowed: {ip}"),
});
}
Ok(())
}
fn validate_ipv6(ip: Ipv6Addr) -> Result<()> {
if ip.is_loopback() {
return Err(FeedError::Http {
message: format!("IPv6 loopback address not allowed: {ip}"),
});
}
if ip.is_unicast_link_local() {
return Err(FeedError::Http {
message: format!("IPv6 link-local address not allowed: {ip} (fe80::/10)"),
});
}
let segments = ip.segments();
if (segments[0] & 0xFE00) == 0xFC00 {
return Err(FeedError::Http {
message: format!("IPv6 unique local address not allowed: {ip} (fc00::/7)"),
});
}
if ip.is_multicast() {
return Err(FeedError::Http {
message: format!("IPv6 multicast address not allowed: {ip} (ff00::/8)"),
});
}
Ok(())
}
fn validate_domain(domain: &str) -> Result<()> {
let domain_lower = domain.to_lowercase();
if LOCALHOST_VARIANTS.contains(&domain_lower.as_str()) {
return Err(FeedError::Http {
message: format!("Localhost domain not allowed: {domain}"),
});
}
for tld in INTERNAL_TLDS {
if domain_lower.ends_with(tld) {
return Err(FeedError::Http {
message: format!("Internal domain TLD not allowed: {domain}"),
});
}
}
if METADATA_DOMAINS.contains(&domain_lower.as_str()) {
return Err(FeedError::Http {
message: format!("Cloud metadata domain not allowed: {domain}"),
});
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_valid_http_url() {
assert!(validate_url("http://example.com/feed.xml").is_ok());
}
#[test]
fn test_valid_https_url() {
assert!(validate_url("https://blog.example.org/rss").is_ok());
}
#[test]
fn test_valid_with_port() {
assert!(validate_url("https://example.com:8443/feed").is_ok());
}
#[test]
fn test_valid_with_path() {
assert!(validate_url("https://example.com/path/to/feed.xml").is_ok());
}
#[test]
fn test_reject_file_scheme() {
assert!(validate_url("file:///etc/passwd").is_err());
}
#[test]
fn test_reject_ftp_scheme() {
assert!(validate_url("ftp://example.com/file").is_err());
}
#[test]
fn test_reject_javascript_scheme() {
assert!(validate_url("javascript:alert(1)").is_err());
}
#[test]
fn test_reject_data_scheme() {
assert!(validate_url("data:text/html,<script>alert(1)</script>").is_err());
}
#[test]
fn test_reject_ipv4_private_10() {
assert!(validate_url("http://10.0.0.1/").is_err());
assert!(validate_url("http://10.255.255.255/").is_err());
}
#[test]
fn test_reject_ipv4_private_172() {
assert!(validate_url("http://172.16.0.1/").is_err());
assert!(validate_url("http://172.31.255.255/").is_err());
}
#[test]
fn test_reject_ipv4_private_192() {
assert!(validate_url("http://192.168.0.1/").is_err());
assert!(validate_url("http://192.168.255.255/").is_err());
}
#[test]
fn test_reject_ipv4_localhost() {
assert!(validate_url("http://127.0.0.1/").is_err());
assert!(validate_url("http://127.0.0.2/").is_err());
}
#[test]
fn test_reject_ipv4_link_local() {
assert!(validate_url("http://169.254.169.254/").is_err());
assert!(validate_url("http://169.254.0.1/").is_err());
}
#[test]
fn test_reject_ipv4_zero() {
assert!(validate_url("http://0.0.0.0/").is_err());
}
#[test]
fn test_reject_ipv4_broadcast() {
assert!(validate_url("http://255.255.255.255/").is_err());
}
#[test]
fn test_reject_ipv6_loopback() {
assert!(validate_url("http://[::1]/").is_err());
}
#[test]
fn test_reject_ipv6_link_local() {
assert!(validate_url("http://[fe80::1]/").is_err());
}
#[test]
fn test_reject_ipv6_unique_local() {
assert!(validate_url("http://[fc00::1]/").is_err());
assert!(validate_url("http://[fd00::1]/").is_err());
}
#[test]
fn test_reject_localhost_domain() {
assert!(validate_url("http://localhost/").is_err());
}
#[test]
fn test_reject_local_tld() {
assert!(validate_url("http://myserver.local/").is_err());
}
#[test]
fn test_reject_internal_tld() {
assert!(validate_url("http://server.internal/").is_err());
}
#[test]
fn test_reject_cloud_metadata() {
assert!(validate_url("http://metadata.google.internal/").is_err());
assert!(validate_url("http://metadata.azure.com/").is_err());
}
#[test]
fn test_reject_no_host() {
assert!(validate_url("http://").is_err());
}
#[test]
fn test_reject_invalid_url() {
assert!(validate_url("not a url").is_err());
}
#[test]
fn test_public_ip_allowed() {
assert!(validate_url("http://8.8.8.8/").is_ok());
assert!(validate_url("http://1.1.1.1/").is_ok());
}
#[test]
fn test_carrier_grade_nat_blocked() {
assert!(validate_url("http://100.64.0.1/").is_err());
assert!(validate_url("http://100.127.255.255/").is_err());
}
#[test]
fn test_ipv6_multicast_blocked() {
assert!(validate_url("http://[ff00::1]/").is_err());
assert!(validate_url("http://[ff02::1]/").is_err());
}
}