use url::Url;
use crate::error::SxurlError;
pub fn normalize_url(url_str: &str) -> Result<Url, SxurlError> {
if url_str.contains("://") {
let parts: Vec<&str> = url_str.splitn(2, "://").collect();
if parts.len() == 2 {
let after_scheme = parts[1];
if after_scheme.is_empty() || after_scheme.starts_with('/') {
return Err(SxurlError::HostNotDns);
}
}
}
let url = Url::parse(url_str)?;
let host_str = match url.host_str() {
Some(host) if !host.is_empty() => host,
_ => return Err(SxurlError::HostNotDns),
};
match url.scheme() {
"https" | "http" | "ftp" => {},
_ => return Err(SxurlError::InvalidScheme),
}
let normalized_host = normalize_host(host_str)?;
let mut reconstructed = String::new();
reconstructed.push_str(url.scheme());
reconstructed.push_str("://");
let username = url.username();
if !username.is_empty() {
reconstructed.push_str(username);
if let Some(password) = url.password() {
reconstructed.push(':');
reconstructed.push_str(password);
}
reconstructed.push('@');
}
reconstructed.push_str(&normalized_host);
if let Some(port) = url.port() {
reconstructed.push(':');
reconstructed.push_str(&port.to_string());
}
reconstructed.push_str(url.path());
if let Some(query) = url.query() {
reconstructed.push('?');
reconstructed.push_str(query);
}
if let Some(fragment) = url.fragment() {
reconstructed.push('#');
reconstructed.push_str(fragment);
}
Ok(Url::parse(&reconstructed)?)
}
pub fn normalize_host(host: &str) -> Result<String, SxurlError> {
let lowercase_host = host.to_lowercase();
let ascii_host = match idna::domain_to_ascii(&lowercase_host) {
Ok(ascii) => ascii,
Err(_) => return Err(SxurlError::HostNotDns),
};
validate_host(&ascii_host)?;
Ok(ascii_host)
}
pub fn validate_host(host: &str) -> Result<(), SxurlError> {
if host.len() > 255 {
return Err(SxurlError::HostTooLong);
}
if host.is_empty() {
return Err(SxurlError::InvalidLabel("Empty hostname".to_string()));
}
if host.parse::<std::net::IpAddr>().is_ok() {
return Err(SxurlError::HostNotDns);
}
if host.contains(':') {
return Err(SxurlError::HostNotDns);
}
let labels: Vec<&str> = host.split('.').collect();
for label in &labels {
validate_dns_label(label)?;
}
if labels.is_empty() {
return Err(SxurlError::InvalidLabel("No labels in hostname".to_string()));
}
Ok(())
}
pub fn validate_dns_label(label: &str) -> Result<(), SxurlError> {
if label.is_empty() {
return Err(SxurlError::InvalidLabel("Empty label".to_string()));
}
if label.len() > 63 {
return Err(SxurlError::InvalidLabel(format!("Label too long: {}", label.len())));
}
for ch in label.chars() {
if !ch.is_ascii_alphanumeric() && ch != '-' {
return Err(SxurlError::InvalidCharacter);
}
}
if label.starts_with('-') || label.ends_with('-') {
return Err(SxurlError::InvalidLabel("Label cannot start or end with hyphen".to_string()));
}
Ok(())
}
pub fn extract_raw_components(url: &Url) -> Result<(String, String, String), SxurlError> {
let path = url.path().to_string();
let query = url.query().unwrap_or("").to_string();
let fragment = url.fragment().unwrap_or("").to_string();
Ok((path, query, fragment))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_normalize_basic_url() {
let url = normalize_url("HTTPS://EXAMPLE.COM/PATH").unwrap();
assert_eq!(url.scheme(), "https");
assert_eq!(url.host_str().unwrap(), "example.com");
assert_eq!(url.path(), "/PATH"); }
#[test]
fn test_invalid_scheme() {
let result = normalize_url("ws://example.com");
assert_eq!(result.unwrap_err(), SxurlError::InvalidScheme);
}
#[test]
fn test_host_normalization() {
assert_eq!(normalize_host("EXAMPLE.COM").unwrap(), "example.com");
assert_eq!(normalize_host("Test-Site.ORG").unwrap(), "test-site.org");
}
#[test]
fn test_host_validation() {
assert!(validate_host("example.com").is_ok());
assert!(validate_host("test-site.org").is_ok());
assert!(validate_host("a.b.c.d.e").is_ok());
assert!(validate_host("").is_err()); assert!(validate_host(&"a".repeat(256)).is_err()); assert!(validate_host("-invalid.com").is_err()); assert!(validate_host("invalid-.com").is_err()); }
#[test]
fn test_dns_label_validation() {
assert!(validate_dns_label("example").is_ok());
assert!(validate_dns_label("test-site").is_ok());
assert!(validate_dns_label("a1b2c3").is_ok());
assert!(validate_dns_label("").is_err()); assert!(validate_dns_label(&"a".repeat(64)).is_err()); assert!(validate_dns_label("-invalid").is_err()); assert!(validate_dns_label("invalid-").is_err()); assert!(validate_dns_label("test.label").is_err()); assert!(validate_dns_label("test_label").is_err()); }
#[test]
fn test_extract_raw_components() {
let url = Url::parse("https://example.com/path?query=value#fragment").unwrap();
let (path, query, fragment) = extract_raw_components(&url).unwrap();
assert_eq!(path, "/path");
assert_eq!(query, "query=value");
assert_eq!(fragment, "fragment");
}
#[test]
fn test_extract_components_with_empty_parts() {
let url = Url::parse("https://example.com/").unwrap();
let (path, query, fragment) = extract_raw_components(&url).unwrap();
assert_eq!(path, "/");
assert_eq!(query, "");
assert_eq!(fragment, "");
}
#[test]
fn test_normalize_preserves_raw_components() {
let url = normalize_url("https://example.com/Path%20With%20Spaces?query=value%20with%20spaces#frag%20ment").unwrap();
assert_eq!(url.path(), "/Path%20With%20Spaces");
assert_eq!(url.query().unwrap(), "query=value%20with%20spaces");
assert_eq!(url.fragment().unwrap(), "frag%20ment");
}
}