use percent_encoding::percent_decode_str;
#[derive(Debug)]
pub(crate) struct ParsedUrl {
pub scheme: String, pub username: String, pub password: Option<String>, pub host: Option<String>, pub port: Option<u16>,
pub path: String, }
#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
#[allow(missing_docs)]
pub enum UrlParseError {
#[error("relative URL without a base")]
RelativeUrlWithoutBase,
#[error("invalid port number - must be between 1-65535")]
InvalidPort,
#[error("invalid domain character")]
InvalidDomainCharacter,
#[error("Scheme requires host")]
SchemeRequiresHost,
}
fn is_valid_scheme_char(c: char) -> bool {
c.is_ascii_alphanumeric() || c == '+' || c == '-' || c == '.'
}
fn percent_decode(s: &str) -> Result<String, UrlParseError> {
percent_decode_str(s)
.decode_utf8()
.map(std::borrow::Cow::into_owned)
.map_err(|_| UrlParseError::InvalidDomainCharacter)
}
impl ParsedUrl {
pub(crate) fn parse(input: &str) -> Result<Self, UrlParseError> {
if input.chars().any(char::is_whitespace) {
return Err(UrlParseError::InvalidDomainCharacter);
}
let first_colon = input.find(':').ok_or(UrlParseError::RelativeUrlWithoutBase)?;
let scheme_str = &input[..first_colon];
let scheme = scheme_str.to_ascii_lowercase();
let Some(after_scheme) = input[first_colon..].strip_prefix("://") else {
return Err(UrlParseError::RelativeUrlWithoutBase);
};
if scheme_str.is_empty() {
return Err(UrlParseError::RelativeUrlWithoutBase);
}
if !scheme_str.chars().all(is_valid_scheme_char) {
return Err(UrlParseError::RelativeUrlWithoutBase);
}
let path_start = after_scheme.find('/').unwrap_or(after_scheme.len());
let authority = &after_scheme[..path_start];
let path = if path_start < after_scheme.len() {
percent_decode(&after_scheme[path_start..])?
} else {
String::new()
};
let (username, password, host, port) = if let Some((user_info, host_port)) = authority.rsplit_once('@') {
let (user, pass) = if let Some((user_str, pass_str)) = user_info.split_once(':') {
let pass = if pass_str.is_empty() {
None
} else {
Some(percent_decode(pass_str)?)
};
(percent_decode(user_str)?, pass)
} else {
(percent_decode(user_info)?, None)
};
let (h, p) = Self::parse_host_port(host_port)?;
if h.is_none() {
return Err(UrlParseError::InvalidDomainCharacter);
}
(user, pass, h, p)
} else {
let (h, p) = Self::parse_host_port(authority)?;
(String::new(), None, h, p)
};
let requires_host = matches!(scheme.as_str(), "http" | "https" | "git" | "ssh" | "ftp" | "ftps");
if requires_host && host.is_none() {
return Err(UrlParseError::SchemeRequiresHost);
}
Ok(ParsedUrl {
scheme,
username,
password,
host,
port,
path,
})
}
fn parse_host_port(host_port: &str) -> Result<(Option<String>, Option<u16>), UrlParseError> {
if host_port.is_empty() {
return Ok((None, None));
}
if host_port.starts_with('[') {
if let Some(bracket_end) = host_port.find(']') {
let remaining = &host_port[bracket_end + 1..];
if remaining.is_empty() {
let host = Some(host_port[..=bracket_end].to_ascii_lowercase());
return Ok((host, None));
} else if let Some(port_str) = remaining.strip_prefix(':') {
if port_str.is_empty() {
let host = Some(host_port.to_ascii_lowercase());
return Ok((host, None));
}
let port = port_str.parse::<u16>().map_err(|_| UrlParseError::InvalidPort)?;
if port == 0 {
return Err(UrlParseError::InvalidPort);
}
let host = Some(host_port[..=bracket_end].to_ascii_lowercase());
return Ok((host, Some(port)));
} else {
return Err(UrlParseError::InvalidDomainCharacter);
}
} else {
return Err(UrlParseError::InvalidDomainCharacter);
}
}
if let Some((before_last_colon, after_last_colon)) = host_port.rsplit_once(':') {
let has_colon_before_last = before_last_colon.contains(':');
let is_all_digits_after =
!after_last_colon.is_empty() && after_last_colon.chars().all(|c| c.is_ascii_digit());
if !has_colon_before_last {
if after_last_colon.is_empty() {
return Ok((Some(Self::normalize_hostname(host_port)?), None));
} else if is_all_digits_after {
let host = Self::normalize_hostname(before_last_colon)?;
let port = after_last_colon
.parse::<u16>()
.map_err(|_| UrlParseError::InvalidPort)?;
if port == 0 {
return Err(UrlParseError::InvalidPort);
}
return Ok((Some(host), Some(port)));
}
}
}
Ok((Some(Self::normalize_hostname(host_port)?), None))
}
fn is_normalizable_hostname(host: &str) -> bool {
host.chars()
.all(|c| c.is_ascii_alphanumeric() || matches!(c, '-' | '.' | '_' | '*'))
}
fn normalize_hostname(host: &str) -> Result<String, UrlParseError> {
if host.chars().any(|c| c == '?' || c.is_whitespace()) {
return Err(UrlParseError::InvalidDomainCharacter);
}
if Self::is_normalizable_hostname(host) {
Ok(host.to_ascii_lowercase())
} else {
Ok(host.to_owned())
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn simple_url() {
let url = ParsedUrl::parse("http://example.com/path").unwrap();
assert_eq!(url.scheme, "http");
assert_eq!(url.host.as_deref(), Some("example.com"));
assert_eq!(url.path, "/path");
assert_eq!(url.username, "");
assert_eq!(url.password, None);
assert_eq!(url.port, None);
}
#[test]
fn url_with_port() {
let url = ParsedUrl::parse("http://example.com:8080/path").unwrap();
assert_eq!(url.scheme, "http");
assert_eq!(url.host.as_deref(), Some("example.com"));
assert_eq!(url.port, Some(8080));
assert_eq!(url.path, "/path");
}
#[test]
fn url_with_user() {
let url = ParsedUrl::parse("http://user@example.com/path").unwrap();
assert_eq!(url.scheme, "http");
assert_eq!(url.username, "user");
assert_eq!(url.host.as_deref(), Some("example.com"));
assert_eq!(url.path, "/path");
}
#[test]
fn url_with_user_and_password() {
let url = ParsedUrl::parse("http://user:pass@example.com/path").unwrap();
assert_eq!(url.scheme, "http");
assert_eq!(url.username, "user");
assert_eq!(url.password.as_deref(), Some("pass"));
assert_eq!(url.host.as_deref(), Some("example.com"));
assert_eq!(url.path, "/path");
}
#[test]
fn url_with_ipv6() {
let url = ParsedUrl::parse("http://[::1]/path").unwrap();
assert_eq!(url.scheme, "http");
assert_eq!(url.host.as_deref(), Some("[::1]"));
assert_eq!(url.path, "/path");
}
#[test]
fn url_with_ipv6_and_port() {
let url = ParsedUrl::parse("http://[::1]:8080/path").unwrap();
assert_eq!(url.scheme, "http");
assert_eq!(url.host.as_deref(), Some("[::1]"));
assert_eq!(url.port, Some(8080));
assert_eq!(url.path, "/path");
}
#[test]
fn url_with_space_in_host_is_rejected() {
assert!(ParsedUrl::parse("http://has a space").is_err());
assert!(ParsedUrl::parse("http://has a space/path").is_err());
assert!(ParsedUrl::parse("https://example.com with space/path").is_err());
}
#[test]
fn url_with_tab_in_host_is_rejected() {
assert!(ParsedUrl::parse("http://has\ta\ttab").is_err());
}
#[test]
fn url_with_newline_in_host_is_rejected() {
assert!(ParsedUrl::parse("http://has\na\nnewline").is_err());
}
#[test]
fn url_with_percent_encoded_username() {
let url = ParsedUrl::parse("http://user%20name@example.com/path").unwrap();
assert_eq!(url.scheme, "http");
assert_eq!(url.username, "user name");
assert_eq!(url.password, None);
assert_eq!(url.host.as_deref(), Some("example.com"));
assert_eq!(url.path, "/path");
}
#[test]
fn url_with_percent_encoded_password() {
let url = ParsedUrl::parse("http://user:pass%20word@example.com/path").unwrap();
assert_eq!(url.scheme, "http");
assert_eq!(url.username, "user");
assert_eq!(url.password.as_deref(), Some("pass word"));
assert_eq!(url.host.as_deref(), Some("example.com"));
assert_eq!(url.path, "/path");
}
#[test]
fn url_with_percent_encoded_username_and_password() {
let url = ParsedUrl::parse("http://user%20name:pass%20word@example.com/path").unwrap();
assert_eq!(url.scheme, "http");
assert_eq!(url.username, "user name");
assert_eq!(url.password.as_deref(), Some("pass word"));
assert_eq!(url.host.as_deref(), Some("example.com"));
assert_eq!(url.path, "/path");
}
#[test]
fn url_with_special_chars_in_username() {
let url = ParsedUrl::parse("http://user%40name@example.com/path").unwrap();
assert_eq!(url.scheme, "http");
assert_eq!(url.username, "user@name");
assert_eq!(url.password, None);
assert_eq!(url.host.as_deref(), Some("example.com"));
assert_eq!(url.path, "/path");
}
#[test]
fn url_with_special_chars_in_password() {
let url = ParsedUrl::parse("http://user:p%40ss%3Aword@example.com/path").unwrap();
assert_eq!(url.scheme, "http");
assert_eq!(url.username, "user");
assert_eq!(url.password.as_deref(), Some("p@ss:word"));
assert_eq!(url.host.as_deref(), Some("example.com"));
assert_eq!(url.path, "/path");
}
#[test]
fn url_with_percent_encoded_path() {
let url = ParsedUrl::parse("http://example.com/path/with%20spaces/file").unwrap();
assert_eq!(url.scheme, "http");
assert_eq!(url.host.as_deref(), Some("example.com"));
assert_eq!(url.path, "/path/with spaces/file");
}
}