use std::net::IpAddr;
use std::sync::LazyLock;
use ipnet::IpNet;
use url::Url;
static DEFAULT_DENY_NETS: LazyLock<Vec<IpNet>> = LazyLock::new(|| {
vec![
"127.0.0.0/8".parse().unwrap(), "10.0.0.0/8".parse().unwrap(), "172.16.0.0/12".parse().unwrap(), "192.168.0.0/16".parse().unwrap(), "169.254.0.0/16".parse().unwrap(), "0.0.0.0/8".parse().unwrap(), "224.0.0.0/4".parse().unwrap(), "::1/128".parse().unwrap(), "fe80::/10".parse().unwrap(), "fc00::/7".parse().unwrap(), "ff00::/8".parse().unwrap(), ]
});
fn is_ip_denied(ip: IpAddr) -> bool {
for net in DEFAULT_DENY_NETS.iter() {
if net.contains(&ip) {
return true;
}
}
false
}
pub fn validate_url(url: &Url) -> Result<(), String> {
let scheme = url.scheme();
if scheme == "file" {
return Ok(());
}
let allow_private_network = std::env::var_os("KREUZCRAWL_ALLOW_PRIVATE_NETWORK").is_some();
if allow_private_network {
return Ok(());
}
if scheme != "http" && scheme != "https" {
return Err(format!(
"Forbidden URL scheme '{}' - only http, https, and file are allowed",
scheme
));
}
if let Some(host) = url.host() {
match host {
url::Host::Ipv4(ip) => {
let ip_addr: IpAddr = ip.into();
if is_ip_denied(ip_addr) {
return Err(format!("Access to private/internal IP address {} is not allowed", ip));
}
}
url::Host::Ipv6(ip) => {
let ip_addr: IpAddr = ip.into();
if is_ip_denied(ip_addr) {
return Err(format!("Access to private/internal IPv6 address {} is not allowed", ip));
}
}
url::Host::Domain(domain) => {
let lower_domain = domain.to_lowercase();
if lower_domain == "localhost" || lower_domain.ends_with(".localhost") {
return Err(format!("Localhost rebinding attack blocked: {}", domain));
}
}
}
}
Ok(())
}