use crate::error::Error;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Url {
inner: url::Url,
override_host: Option<String>,
raw_input: Option<String>,
}
impl Url {
pub fn parse(input: &str) -> Result<Self, Error> {
if input.is_empty() {
return Err(Error::UrlParse("empty URL".to_string()));
}
let input = Self::maybe_add_scheme(input);
if let Some(scheme_end) = input.find("://") {
let rest = &input[scheme_end + 3..];
let authority_end = rest.find('/').unwrap_or(rest.len());
let authority = &rest[..authority_end];
if authority.chars().filter(|&c| c == '@').count() > 1 {
return Err(Error::UrlParse(
"Port number was not a decimal number between 0 and 65535".to_string(),
));
}
}
let input = if input.contains("%25") {
if let Some(bracket_start) = input.find('[') {
if let Some(bracket_end) = input[bracket_start..].find(']') {
let bracket_end = bracket_start + bracket_end;
let host_part = &input[bracket_start..=bracket_end];
if let Some(pct_pos) = host_part.find("%25") {
format!("{}{}", &input[..bracket_start + pct_pos], &input[bracket_end..])
} else {
input
}
} else {
input
}
} else {
input
}
} else {
input
};
match url::Url::parse(&input) {
Ok(inner) => {
if inner.host_str().is_some_and(|h| h.len() > 65535) {
return Err(Error::UrlParse("hostname too long".to_string()));
}
if let Some(h) = inner.host_str() {
for label in h.split('.') {
if label.starts_with("xn--") && label.len() > 63 {
return Err(Error::UrlParse("hostname label too long".to_string()));
}
}
}
Ok(Self { inner, override_host: None, raw_input: Some(input.clone()) })
}
Err(e) => {
let err_str = e.to_string();
if err_str.contains("invalid IPv4 address")
|| err_str.contains("invalid domain character")
{
if let Some(after_scheme) = input.find("://") {
let host_start = after_scheme + 3;
let rest = &input[host_start..];
let authority_end = rest.find(['/', '?', '#']).unwrap_or(rest.len());
let authority = &rest[..authority_end];
let host_part_start = authority.find('@').map_or(0, |at_pos| at_pos + 1);
let host_rest = &rest[host_part_start..];
let host_end =
host_rest.find([':', '/', '?', '#']).unwrap_or(host_rest.len());
let original_host = &host_rest[..host_end];
if !original_host.is_ascii() {
return Err(Error::UrlParse(format!(
"IDN hostname conversion failed: {original_host}"
)));
}
let placeholder = "urlx-placeholder.invalid";
let modified = format!(
"{}{}{}",
&input[..host_start + host_part_start],
placeholder,
&host_rest[host_end..],
);
if let Ok(inner) = url::Url::parse(&modified) {
return Ok(Self {
inner,
override_host: Some(original_host.to_string()),
raw_input: Some(input.clone()),
});
}
}
}
Err(Error::UrlParse(e.to_string()))
}
}
}
#[must_use]
pub fn scheme(&self) -> &str {
self.inner.scheme()
}
#[must_use]
pub fn host_str(&self) -> Option<&str> {
self.override_host.as_deref().or_else(|| self.inner.host_str())
}
#[must_use]
pub fn port(&self) -> Option<u16> {
self.inner.port()
}
#[must_use]
pub fn port_or_default(&self) -> Option<u16> {
self.inner.port_or_known_default().or_else(|| match self.inner.scheme() {
"ftps" => Some(990),
"sftp" | "scp" | "ssh" => Some(22),
"gopher" => Some(70),
"gophers" => Some(443),
"rtsp" => Some(554),
"rtsps" => Some(322),
_ => None,
})
}
#[must_use]
pub fn path(&self) -> &str {
self.inner.path()
}
#[must_use]
pub fn query(&self) -> Option<&str> {
self.inner.query()
}
#[must_use]
pub fn fragment(&self) -> Option<&str> {
self.inner.fragment()
}
#[must_use]
pub fn username(&self) -> &str {
self.inner.username()
}
#[must_use]
pub fn password(&self) -> Option<&str> {
self.inner.password()
}
#[must_use]
pub fn credentials(&self) -> Option<(&str, &str)> {
let user = self.inner.username();
if user.is_empty() {
return None;
}
let pass = self.inner.password().unwrap_or("");
Some((user, pass))
}
#[must_use]
pub fn as_str(&self) -> &str {
self.inner.as_str()
}
#[must_use]
pub fn to_full_string(&self) -> String {
self.override_host.as_ref().map_or_else(
|| self.inner.as_str().to_string(),
|real_host| self.inner.as_str().replace("urlx-placeholder.invalid", real_host),
)
}
pub fn host_and_port(&self) -> Result<(String, u16), Error> {
let host = self
.host_str()
.ok_or_else(|| Error::UrlParse("URL has no host".to_string()))?
.to_string();
let port = self.port_or_default().ok_or_else(|| {
Error::UrlParse("URL has no port and no default for scheme".to_string())
})?;
Ok((host, port))
}
#[must_use]
pub fn host_header_value(&self) -> String {
let host = self.raw_host_str().unwrap_or_else(|| self.host_str().unwrap_or("").to_string());
match self.inner.port() {
Some(port) => format!("{host}:{port}"),
None => host,
}
}
fn raw_host_str(&self) -> Option<String> {
let raw = self.raw_input.as_deref()?;
let after_scheme = raw.find("://")?;
let rest = &raw[after_scheme + 3..];
let slash_pos = rest.find('/').unwrap_or(rest.len());
let host_start = rest[..slash_pos].rfind('@').map_or(0, |at| at + 1);
let host_rest = &rest[host_start..];
let host_end = host_rest.find([':', '/', '?', '#']).unwrap_or(host_rest.len());
let raw_host = &host_rest[..host_end];
let parsed_host = self.host_str()?;
if raw_host.eq_ignore_ascii_case(parsed_host) {
Some(raw_host.to_string())
} else {
None
}
}
#[must_use]
pub fn raw_input(&self) -> Option<&str> {
self.raw_input.as_deref()
}
pub fn clear_raw_input(&mut self) {
self.raw_input = None;
}
#[must_use]
pub fn request_target(&self) -> String {
if let Some(ref raw) = self.raw_input {
let raw_path = extract_raw_path_and_query(raw);
return normalize_dot_segments(&raw_path);
}
let path = self
.inner
.path()
.replace("%22", "\"")
.replace("%7B", "{")
.replace("%7D", "}")
.replace("%5C", "\\");
match self.inner.query() {
Some(q) => format!("{path}?{}", q.replace("%20", "+")),
None => path,
}
}
pub fn set_port(&mut self, port: Option<u16>) -> Result<(), Error> {
self.inner
.set_port(port)
.map_err(|()| Error::UrlParse("cannot set port on this URL".to_string()))
}
pub fn set_scheme(&mut self, scheme: &str) -> Result<(), Error> {
if self.inner.set_scheme(scheme).is_ok() {
return Ok(());
}
let old_str = self.inner.as_str();
let rest = old_str.find("://").map_or(old_str, |idx| &old_str[idx..]);
let new_str = format!("{scheme}{rest}");
self.inner = url::Url::parse(&new_str)
.map_err(|e| Error::UrlParse(format!("cannot set scheme to '{scheme}': {e}")))?;
Ok(())
}
const KNOWN_SCHEMES: &'static [&'static str] = &[
"file", "ftp", "ftps", "http", "https", "sftp", "scp", "dict", "tftp", "mqtt", "ws", "wss",
"smtp", "smtps", "imap", "imaps", "pop3", "pop3s", "rtsp", "telnet", "ldap", "ldaps",
"gopher", "gophers",
];
fn maybe_add_scheme(input: &str) -> String {
if input.contains("://") {
return input.to_string();
}
if let Some(colon_pos) = input.find(':') {
let before_colon = &input[..colon_pos];
if Self::KNOWN_SCHEMES.iter().any(|s| s.eq_ignore_ascii_case(before_colon)) {
let after_colon = &input[colon_pos + 1..];
if after_colon.starts_with('/') && !after_colon.starts_with("//") {
if before_colon.eq_ignore_ascii_case("file") {
return format!("{before_colon}://{after_colon}");
}
return format!("{before_colon}:/{after_colon}");
}
return input.to_string();
}
}
format!("http://{input}")
}
}
impl std::fmt::Display for Url {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.inner)
}
}
fn extract_raw_path_and_query(url_str: &str) -> String {
if let Some(scheme_end) = url_str.find("://") {
let after_scheme = &url_str[scheme_end + 3..];
let slash_pos = after_scheme.find('/');
let query_pos = after_scheme.find('?');
let path_start = match (slash_pos, query_pos) {
(Some(s), Some(q)) => Some(s.min(q)),
(Some(s), None) => Some(s),
(None, Some(q)) => Some(q),
(None, None) => None,
};
if let Some(start) = path_start {
let path_and_rest = &after_scheme[start..];
let result = if path_and_rest.starts_with('?') {
format!("/{path_and_rest}")
} else {
path_and_rest.to_string()
};
if let Some(frag_pos) = result.find('#') {
return result[..frag_pos].to_string();
}
return result;
}
return "/".to_string();
}
url_str.to_string()
}
fn normalize_dot_segments(path_and_query: &str) -> String {
let (path, query) = path_and_query.find('?').map_or((path_and_query, None), |q_pos| {
(&path_and_query[..q_pos], Some(&path_and_query[q_pos..]))
});
let mut output_segments: Vec<&str> = Vec::new();
for segment in path.split('/') {
match segment {
"." => {
}
".." => {
let _ = output_segments.pop();
}
_ => {
output_segments.push(segment);
}
}
}
let normalized = output_segments.join("/");
let result = if normalized.starts_with('/') {
normalized
} else if path.starts_with('/') {
format!("/{normalized}")
} else {
normalized
};
match query {
Some(q) => format!("{result}{q}"),
None => result,
}
}
#[cfg(test)]
#[allow(clippy::unwrap_used)]
mod tests {
use super::*;
#[test]
fn parse_simple_https() {
let url = Url::parse("https://example.com/path?q=1").unwrap();
assert_eq!(url.scheme(), "https");
assert_eq!(url.host_str(), Some("example.com"));
assert_eq!(url.path(), "/path");
assert_eq!(url.query(), Some("q=1"));
assert_eq!(url.fragment(), None);
}
#[test]
fn parse_simple_http() {
let url = Url::parse("http://example.com").unwrap();
assert_eq!(url.scheme(), "http");
assert_eq!(url.host_str(), Some("example.com"));
assert_eq!(url.port_or_default(), Some(80));
}
#[test]
fn parse_with_port() {
let url = Url::parse("http://example.com:8080/path").unwrap();
assert_eq!(url.port(), Some(8080));
assert_eq!(url.port_or_default(), Some(8080));
}
#[test]
fn parse_https_default_port() {
let url = Url::parse("https://example.com/").unwrap();
assert_eq!(url.port(), None);
assert_eq!(url.port_or_default(), Some(443));
}
#[test]
fn parse_with_fragment() {
let url = Url::parse("https://example.com/page#section").unwrap();
assert_eq!(url.fragment(), Some("section"));
}
#[test]
fn parse_with_userinfo() {
let url = Url::parse("http://user:pass@example.com/").unwrap();
assert_eq!(url.username(), "user");
assert_eq!(url.password(), Some("pass"));
}
#[test]
fn parse_empty_returns_error() {
assert!(Url::parse("").is_err());
}
#[test]
fn parse_no_scheme_defaults_to_http() {
let url = Url::parse("example.com/path").unwrap();
assert_eq!(url.scheme(), "http");
assert_eq!(url.host_str(), Some("example.com"));
assert_eq!(url.path(), "/path");
}
#[test]
fn host_and_port_http() {
let url = Url::parse("http://example.com/path").unwrap();
let (host, port) = url.host_and_port().unwrap();
assert_eq!(host, "example.com");
assert_eq!(port, 80);
}
#[test]
fn host_and_port_https() {
let url = Url::parse("https://secure.example.com/").unwrap();
let (host, port) = url.host_and_port().unwrap();
assert_eq!(host, "secure.example.com");
assert_eq!(port, 443);
}
#[test]
fn host_and_port_custom() {
let url = Url::parse("http://localhost:3000/api").unwrap();
let (host, port) = url.host_and_port().unwrap();
assert_eq!(host, "localhost");
assert_eq!(port, 3000);
}
#[test]
fn request_target_with_query() {
let url = Url::parse("http://example.com/api?key=value").unwrap();
assert_eq!(url.request_target(), "/api?key=value");
}
#[test]
fn request_target_without_query() {
let url = Url::parse("http://example.com/api").unwrap();
assert_eq!(url.request_target(), "/api");
}
#[test]
fn request_target_root() {
let url = Url::parse("http://example.com").unwrap();
assert_eq!(url.request_target(), "/");
}
#[test]
fn display_roundtrip() {
let input = "https://example.com/path?q=1#frag";
let url = Url::parse(input).unwrap();
assert_eq!(url.to_string(), input);
}
#[test]
fn parse_ipv4_host() {
let url = Url::parse("http://127.0.0.1:8080/").unwrap();
assert_eq!(url.host_str(), Some("127.0.0.1"));
assert_eq!(url.port(), Some(8080));
}
#[test]
fn parse_ipv6_host() {
let url = Url::parse("http://[::1]:8080/").unwrap();
assert_eq!(url.host_str(), Some("[::1]"));
assert_eq!(url.port(), Some(8080));
}
#[test]
fn parse_percent_encoded_path() {
let url = Url::parse("http://example.com/hello%20world").unwrap();
assert_eq!(url.path(), "/hello%20world");
}
#[test]
fn parse_percent_encoded_query() {
let url = Url::parse("http://example.com/path?q=hello%20world").unwrap();
assert_eq!(url.query(), Some("q=hello%20world"));
}
#[test]
fn parse_credentials_in_url() {
let url = Url::parse("http://admin:secret@example.com/").unwrap();
let (user, pass) = url.credentials().unwrap();
assert_eq!(user, "admin");
assert_eq!(pass, "secret");
}
#[test]
fn parse_credentials_username_only() {
let url = Url::parse("http://admin@example.com/").unwrap();
let (user, pass) = url.credentials().unwrap();
assert_eq!(user, "admin");
assert_eq!(pass, "");
}
#[test]
fn parse_no_credentials() {
let url = Url::parse("http://example.com/").unwrap();
assert!(url.credentials().is_none());
}
#[test]
fn parse_ftp_scheme() {
let url = Url::parse("ftp://files.example.com/pub/readme.txt").unwrap();
assert_eq!(url.scheme(), "ftp");
assert_eq!(url.host_str(), Some("files.example.com"));
assert_eq!(url.path(), "/pub/readme.txt");
assert_eq!(url.port_or_default(), Some(21));
}
#[test]
fn parse_ftps_scheme() {
let url = Url::parse("ftps://secure.example.com/data/file.csv").unwrap();
assert_eq!(url.scheme(), "ftps");
assert_eq!(url.host_str(), Some("secure.example.com"));
assert_eq!(url.path(), "/data/file.csv");
assert_eq!(url.port_or_default(), Some(990));
}
#[test]
fn parse_ftps_custom_port() {
let url = Url::parse("ftps://secure.example.com:2121/file.txt").unwrap();
assert_eq!(url.scheme(), "ftps");
assert_eq!(url.port(), Some(2121));
assert_eq!(url.port_or_default(), Some(2121));
}
#[test]
fn parse_ftps_with_credentials() {
let url = Url::parse("ftps://user:pass@ftp.example.com/pub/").unwrap();
let (user, pass) = url.credentials().unwrap();
assert_eq!(user, "user");
assert_eq!(pass, "pass");
}
#[test]
fn parse_file_url() {
let url = Url::parse("file:///tmp/test.txt").unwrap();
assert_eq!(url.scheme(), "file");
assert_eq!(url.path(), "/tmp/test.txt");
}
#[test]
fn parse_url_with_special_query_chars() {
let url = Url::parse("http://example.com/search?q=a&b=c&d=e").unwrap();
assert_eq!(url.query(), Some("q=a&b=c&d=e"));
}
#[test]
fn parse_path_with_dots() {
let url = Url::parse("http://example.com/a/b/../c").unwrap();
assert_eq!(url.path(), "/a/c");
}
#[test]
fn parse_trailing_slash() {
let url = Url::parse("http://example.com/path/").unwrap();
assert_eq!(url.path(), "/path/");
}
#[test]
fn parse_double_slash_in_path() {
let url = Url::parse("http://example.com//path").unwrap();
assert_eq!(url.path(), "//path");
}
#[test]
fn host_header_default_port_omitted() {
let url = Url::parse("http://example.com/").unwrap();
assert_eq!(url.host_header_value(), "example.com");
}
#[test]
fn host_header_custom_port_included() {
let url = Url::parse("http://example.com:8080/").unwrap();
assert_eq!(url.host_header_value(), "example.com:8080");
}
#[test]
fn parse_long_path() {
let long_path = "/a".repeat(500);
let url_str = format!("http://example.com{long_path}");
let url = Url::parse(&url_str).unwrap();
assert_eq!(url.path().len(), 1000);
}
#[test]
fn parse_empty_path() {
let url = Url::parse("http://example.com").unwrap();
assert_eq!(url.path(), "/");
}
#[test]
fn parse_url_with_port_zero() {
let url = Url::parse("http://example.com:0/").unwrap();
assert_eq!(url.port(), Some(0));
}
#[test]
fn parse_sftp_scheme() {
let url = Url::parse("sftp://user@host.example.com/path/file.txt").unwrap();
assert_eq!(url.scheme(), "sftp");
assert_eq!(url.host_str(), Some("host.example.com"));
assert_eq!(url.path(), "/path/file.txt");
assert_eq!(url.port_or_default(), Some(22));
assert_eq!(url.username(), "user");
}
#[test]
fn parse_scp_scheme() {
let url = Url::parse("scp://user:pass@host.example.com/remote/file").unwrap();
assert_eq!(url.scheme(), "scp");
assert_eq!(url.host_str(), Some("host.example.com"));
assert_eq!(url.path(), "/remote/file");
assert_eq!(url.port_or_default(), Some(22));
let (user, pass) = url.credentials().unwrap();
assert_eq!(user, "user");
assert_eq!(pass, "pass");
}
#[test]
fn parse_sftp_custom_port() {
let url = Url::parse("sftp://user@host.example.com:2222/file.txt").unwrap();
assert_eq!(url.port(), Some(2222));
assert_eq!(url.port_or_default(), Some(2222));
}
#[test]
fn set_port_explicit() {
let mut url = Url::parse("http://example.com:3128/path").unwrap();
url.set_port(Some(9999)).unwrap();
assert_eq!(url.port(), Some(9999));
assert_eq!(url.as_str(), "http://example.com:9999/path");
}
#[test]
fn set_port_remove() {
let mut url = Url::parse("http://example.com:3128/path").unwrap();
url.set_port(None).unwrap();
assert_eq!(url.port(), None);
assert_eq!(url.port_or_default(), Some(80));
}
#[test]
fn set_scheme_http_to_socks5() {
let mut url = Url::parse("http://proxy.example.com:8080/").unwrap();
url.set_scheme("socks5").unwrap();
assert_eq!(url.scheme(), "socks5");
assert_eq!(url.host_str(), Some("proxy.example.com"));
assert_eq!(url.port(), Some(8080));
}
#[test]
fn set_scheme_invalid() {
let mut url = Url::parse("http://example.com/").unwrap();
assert!(url.set_scheme("").is_err());
}
}