use crate::{Error, abi};
#[derive(Debug, Clone, PartialEq, Eq)]
#[non_exhaustive]
pub enum ParseError {
EmptyHost,
IdnaError,
InvalidPort,
InvalidIpv4Address,
InvalidIpv6Address,
InvalidDomainCharacter,
RelativeUrlWithoutBase,
RelativeUrlWithCannotBeABaseBase,
SetHostOnCannotBeABaseUrl,
Overflow,
InvalidUrl,
UnsupportedScheme,
}
impl std::fmt::Display for ParseError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
ParseError::EmptyHost => f.write_str("empty host"),
ParseError::IdnaError => f.write_str("invalid international domain name"),
ParseError::InvalidPort => f.write_str("invalid port number"),
ParseError::InvalidIpv4Address => f.write_str("invalid IPv4 address"),
ParseError::InvalidIpv6Address => f.write_str("invalid IPv6 address"),
ParseError::InvalidDomainCharacter => f.write_str("invalid domain character"),
ParseError::RelativeUrlWithoutBase => f.write_str("relative URL without a base"),
ParseError::RelativeUrlWithCannotBeABaseBase => {
f.write_str("relative URL with a cannot-be-a-base base")
}
ParseError::SetHostOnCannotBeABaseUrl => {
f.write_str("a cannot-be-a-base URL doesn't have a host to set")
}
ParseError::Overflow => f.write_str("URLs more than 4 GB are not supported"),
ParseError::InvalidUrl => f.write_str("invalid URL"),
ParseError::UnsupportedScheme => f.write_str("unsupported URL scheme"),
}
}
}
impl std::error::Error for ParseError {}
#[derive(Clone, PartialEq, Eq, Hash)]
pub struct Url {
pub(crate) serialized: String,
pub(crate) scheme: String,
pub(crate) host: String,
pub(crate) port: u16,
pub(crate) explicit_port: bool,
pub(crate) path: String,
pub(crate) query: Option<String>,
pub(crate) fragment: Option<String>,
pub(crate) is_https: bool,
pub(crate) path_and_query: String,
pub(crate) username: String,
pub(crate) password: Option<String>,
}
impl Url {
pub fn as_str(&self) -> &str {
&self.serialized
}
pub fn scheme(&self) -> &str {
&self.scheme
}
pub fn host_str(&self) -> Option<&str> {
Some(&self.host)
}
pub fn port(&self) -> Option<u16> {
if self.explicit_port {
Some(self.port)
} else {
None
}
}
pub fn port_or_known_default(&self) -> Option<u16> {
Some(self.port)
}
pub fn path(&self) -> &str {
&self.path
}
pub fn query(&self) -> Option<&str> {
self.query.as_deref()
}
pub fn fragment(&self) -> Option<&str> {
self.fragment.as_deref()
}
fn serialized_without_fragment(&self) -> String {
match self.serialized.split_once('#') {
Some((before, _)) => before.to_owned(),
None => self.serialized.clone(),
}
}
pub fn parse(url: &str) -> Result<Self, ParseError> {
Url::parse_impl(url)
}
pub fn join(&self, input: &str) -> Result<Self, ParseError> {
if input.is_empty() {
return Url::parse_impl(&self.serialized);
}
if input.starts_with("http://") || input.starts_with("https://") {
return Url::parse_impl(input);
}
if input.starts_with("//") {
let resolved = format!("{}:{input}", self.scheme);
return Url::parse_impl(&resolved);
}
let (input_path, input_query, input_fragment) = split_reference(input);
if input_path.is_empty() && input_query.is_none() {
let mut base_str = self.serialized_without_fragment();
if let Some(frag) = input_fragment {
base_str.push('#');
base_str.push_str(frag);
}
return Url::parse_impl(&base_str);
}
if input_path.is_empty() && input_query.is_some() {
let mut base_str = format!("{}://{}", self.scheme, self.host,);
if self.explicit_port {
base_str.push_str(&format!(":{}", self.port));
}
base_str.push_str(&self.path);
if let Some(q) = input_query {
base_str.push('?');
base_str.push_str(q);
}
if let Some(f) = input_fragment {
base_str.push('#');
base_str.push_str(f);
}
return Url::parse_impl(&base_str);
}
let merged_path = if input_path.starts_with('/') {
input_path.to_owned()
} else {
let base_dir = self.path.rsplit_once('/').map_or("", |(dir, _)| dir);
format!("{base_dir}/{input_path}")
};
let resolved_path = remove_dot_segments(&merged_path);
let mut resolved = format!("{}://{}", self.scheme, self.host);
if self.explicit_port {
resolved.push_str(&format!(":{}", self.port));
}
resolved.push_str(&resolved_path);
if let Some(q) = input_query {
resolved.push('?');
resolved.push_str(q);
}
if let Some(f) = input_fragment {
resolved.push('#');
resolved.push_str(f);
}
Url::parse_impl(&resolved)
}
pub fn domain(&self) -> Option<&str> {
if self.host.parse::<std::net::IpAddr>().is_ok() {
return None;
}
if self.host.starts_with('[') {
return None;
}
Some(&self.host)
}
pub fn has_host(&self) -> bool {
true
}
pub fn has_authority(&self) -> bool {
true
}
pub fn cannot_be_a_base(&self) -> bool {
false
}
pub fn path_segments(&self) -> Option<std::str::Split<'_, char>> {
let path = self.path.strip_prefix('/').unwrap_or(&self.path);
Some(path.split('/'))
}
pub fn username(&self) -> &str {
&self.username
}
pub fn password(&self) -> Option<&str> {
self.password.as_deref()
}
}
impl std::fmt::Display for Url {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(&self.serialized)
}
}
impl std::fmt::Debug for Url {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
struct HostDebug<'a>(&'a str);
impl std::fmt::Debug for HostDebug<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "Some(Domain({:?}))", self.0)
}
}
f.debug_struct("Url")
.field("scheme", &self.scheme)
.field("cannot_be_a_base", &false)
.field("username", &self.username)
.field("password", &self.password)
.field("host", &HostDebug(&self.host))
.field("port", &self.port())
.field("path", &self.path)
.field("query", &self.query)
.field("fragment", &self.fragment)
.finish()
}
}
impl AsRef<str> for Url {
fn as_ref(&self) -> &str {
&self.serialized
}
}
impl From<Url> for String {
fn from(url: Url) -> Self {
url.serialized
}
}
impl std::str::FromStr for Url {
type Err = ParseError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
Url::parse_impl(s)
}
}
impl PartialOrd for Url {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(self.cmp(other))
}
}
impl Ord for Url {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
self.serialized.cmp(&other.serialized)
}
}
pub trait IntoUrlSealed {
fn into_url(self) -> Result<Url, Error>;
}
pub trait IntoUrl: IntoUrlSealed {}
impl IntoUrlSealed for &str {
fn into_url(self) -> Result<Url, Error> {
Url::parse_impl(self).map_err(Error::builder)
}
}
impl IntoUrl for &str {}
impl IntoUrlSealed for String {
fn into_url(self) -> Result<Url, Error> {
Url::parse_impl(&self).map_err(Error::builder)
}
}
impl IntoUrl for String {}
impl IntoUrlSealed for &String {
fn into_url(self) -> Result<Url, Error> {
Url::parse_impl(self).map_err(Error::builder)
}
}
impl IntoUrl for &String {}
impl IntoUrlSealed for Url {
fn into_url(self) -> Result<Url, Error> {
Ok(self)
}
}
impl IntoUrl for Url {}
impl Url {
pub(crate) fn parse_impl(url: &str) -> Result<Self, ParseError> {
let (url_for_crack, fragment) = match url.split_once('#') {
Some((before, frag)) => (
before,
if frag.is_empty() {
None
} else {
Some(frag.to_owned())
},
),
None => (url, None),
};
let (url_without_userinfo, username, password) = extract_userinfo(url_for_crack);
let cracked = abi::winhttp_crack_url(url_without_userinfo.as_ref())
.map_err(|_| ParseError::InvalidUrl)?;
let scheme = cracked.scheme;
let is_https = scheme.eq_ignore_ascii_case("https");
if !is_https && !scheme.eq_ignore_ascii_case("http") {
return Err(ParseError::UnsupportedScheme);
}
let scheme_lower = scheme.to_ascii_lowercase();
let host = cracked.host;
let port = cracked.port;
let default_port: u16 = if is_https { 443 } else { 80 };
let explicit_port = port != default_port;
let raw_path = cracked.path;
let extra = cracked.extra;
let path = if raw_path.is_empty() {
"/".to_owned()
} else {
raw_path
};
let query = extract_query_from_extra(&extra);
let path_and_query = if extra.is_empty() {
path.clone()
} else {
format!("{path}{extra}")
};
let mut serialized = if explicit_port {
format!("{scheme_lower}://{host}:{port}")
} else {
format!("{scheme_lower}://{host}")
};
serialized.push_str(&path_and_query);
if let Some(ref frag) = fragment {
serialized.push('#');
serialized.push_str(frag);
}
Ok(Url {
serialized,
scheme: scheme_lower,
host,
port,
explicit_port,
path,
query,
fragment,
is_https,
path_and_query,
username,
password,
})
}
#[cfg_attr(all(not(feature = "query"), not(test)), expect(dead_code))]
pub(crate) fn set_query_string(&mut self, query: String) {
self.path_and_query = format!("{}?{query}", self.path);
self.query = Some(query);
let mut serialized = if self.explicit_port {
format!("{}://{}:{}", self.scheme, self.host, self.port)
} else {
format!("{}://{}", self.scheme, self.host)
};
serialized.push_str(&self.path_and_query);
if let Some(ref frag) = self.fragment {
serialized.push('#');
serialized.push_str(frag);
}
self.serialized = serialized;
}
pub(crate) fn from_http_uri(uri: &http::Uri) -> Result<Self, ParseError> {
let scheme = uri.scheme_str().ok_or(ParseError::RelativeUrlWithoutBase)?;
let is_https = scheme.eq_ignore_ascii_case("https");
if !is_https && !scheme.eq_ignore_ascii_case("http") {
return Err(ParseError::UnsupportedScheme);
}
let scheme_lower = scheme.to_ascii_lowercase();
let authority = uri.authority().ok_or(ParseError::EmptyHost)?;
let host = authority.host().to_owned();
if host.is_empty() {
return Err(ParseError::EmptyHost);
}
let default_port: u16 = if is_https { 443 } else { 80 };
let port = authority.port_u16().unwrap_or(default_port);
let explicit_port = port != default_port;
let (path, query) = match uri.path_and_query() {
Some(pq) => {
let p = pq.path();
let path = if p.is_empty() {
"/".to_owned()
} else {
p.to_owned()
};
let query = pq.query().map(|q| q.to_owned());
(path, query)
}
None => ("/".to_owned(), None),
};
let path_and_query = match &query {
Some(q) => format!("{path}?{q}"),
None => path.clone(),
};
let auth_str = authority.as_str();
let (username, password) = match auth_str.rsplit_once('@') {
Some((userinfo, _)) => match userinfo.split_once(':') {
Some((u, p)) => (percent_decode(u), Some(percent_decode(p))),
None => (percent_decode(userinfo), None),
},
None => (String::new(), None),
};
let fragment = None;
let mut serialized = if explicit_port {
format!("{scheme_lower}://{host}:{port}")
} else {
format!("{scheme_lower}://{host}")
};
serialized.push_str(&path_and_query);
Ok(Url {
serialized,
scheme: scheme_lower,
host,
port,
explicit_port,
path,
query,
fragment,
is_https,
path_and_query,
username,
password,
})
}
pub(crate) fn to_http_uri(&self) -> Result<http::Uri, http::Error> {
let authority = if self.explicit_port {
format!("{}:{}", self.host, self.port)
} else {
self.host.clone()
};
http::Uri::builder()
.scheme(self.scheme.as_str())
.authority(authority.as_str())
.path_and_query(self.path_and_query.as_str())
.build()
}
}
fn extract_userinfo(url: &str) -> (std::borrow::Cow<'_, str>, String, Option<String>) {
let (scheme_colon_slashes, authority_and_rest) = match url.split_once("://") {
Some((scheme, rest)) => (scheme, rest),
None => return (std::borrow::Cow::Borrowed(url), String::new(), None),
};
let authority_part = match authority_and_rest.split_once('/') {
Some((auth, _)) => auth,
None => authority_and_rest,
};
let (userinfo, _host_part) = match authority_part.rsplit_once('@') {
Some(parts) => parts,
None => return (std::borrow::Cow::Borrowed(url), String::new(), None),
};
let (raw_user, raw_pass) = match userinfo.split_once(':') {
Some((user, pass)) => (user, Some(pass)),
None => (userinfo, None),
};
let username = percent_decode(raw_user);
let password = raw_pass.map(percent_decode);
let after_at = authority_and_rest
.rsplit_once('@')
.map_or(authority_and_rest, |(_, rest)| rest);
let cleaned = format!("{scheme_colon_slashes}://{after_at}");
(std::borrow::Cow::Owned(cleaned), username, password)
}
fn percent_decode(input: &str) -> String {
let mut out = Vec::with_capacity(input.len());
let bytes = input.as_bytes();
let mut i = 0;
while i < bytes.len() {
if bytes[i] == b'%'
&& i + 2 < bytes.len()
&& let (Some(hi), Some(lo)) = (hex_nibble(bytes[i + 1]), hex_nibble(bytes[i + 2]))
{
out.push(hi << 4 | lo);
i += 3;
continue;
}
out.push(bytes[i]);
i += 1;
}
String::from_utf8(out).unwrap_or_else(|e| String::from_utf8_lossy(e.as_bytes()).into_owned())
}
fn hex_nibble(b: u8) -> Option<u8> {
match b {
b'0'..=b'9' => Some(b - b'0'),
b'A'..=b'F' => Some(b - b'A' + 10),
b'a'..=b'f' => Some(b - b'a' + 10),
_ => None,
}
}
fn remove_dot_segments(path: &str) -> String {
let mut output: Vec<&str> = Vec::new();
for segment in path.split('/') {
match segment {
"." => {
}
".." => {
output.pop();
}
s => {
output.push(s);
}
}
}
let mut result = output.join("/");
if !result.starts_with('/') {
result.insert(0, '/');
}
if (path.ends_with("/.") || path.ends_with("/..")) && !result.ends_with('/') {
result.push('/');
}
result
}
fn split_reference(input: &str) -> (&str, Option<&str>, Option<&str>) {
let (before_frag, fragment) = match input.split_once('#') {
Some((before, f)) => (before, if f.is_empty() { None } else { Some(f) }),
None => (input, None),
};
let (path, query) = match before_frag.split_once('?') {
Some((p, q)) => (p, if q.is_empty() { None } else { Some(q) }),
None => (before_frag, None),
};
(path, query, fragment)
}
fn extract_query_from_extra(extra: &str) -> Option<String> {
let q = extra.strip_prefix('?')?;
if q.is_empty() {
None
} else {
Some(q.to_owned())
}
}
#[cfg(feature = "json")]
impl serde::Serialize for Url {
fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
serializer.serialize_str(self.as_str())
}
}
#[cfg(feature = "json")]
impl<'de> serde::Deserialize<'de> for Url {
fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
let s = String::deserialize(deserializer)?;
Url::parse(&s).map_err(|e| serde::de::Error::custom(e.to_string()))
}
}
#[cfg(test)]
mod tests {
use super::*;
type ParseCase =
(&'static str, &'static str, u16, &'static str, Option<&'static str>, Option<&'static str>);
const PARSE_CASES: &[ParseCase] = &[
("https://example.com/api/v1?id=42", "example.com", 443, "/api/v1", Some("id=42"), None),
("http://localhost:8080/test", "localhost", 8080, "/test", None, None),
("https://example.com", "example.com", 443, "/", None, None),
("http://example.com", "example.com", 80, "/", None, None),
(
"https://example.com/path/to/resource",
"example.com",
443,
"/path/to/resource",
None,
None,
),
("https://example.com:9443/secure", "example.com", 9443, "/secure", None, None),
("https://example.com/page#section", "example.com", 443, "/page", None, Some("section")),
(
"https://example.com/api?key=val&a=b#frag",
"example.com",
443,
"/api",
Some("key=val&a=b"),
Some("frag"),
),
(
"http://tlu.dl.delivery.mp.microsoft.com/files/abc123?P1=123&P2=404&P3=2&P4=cLS1G9%2btest%2fvalue%3d%3d",
"tlu.dl.delivery.mp.microsoft.com",
80,
"/files/abc123",
Some("P1=123&P2=404&P3=2&P4=cLS1G9%2btest%2fvalue%3d%3d"),
None,
),
("https://example.com/%25?%25#%25", "example.com", 443, "/%25", Some("%25"), Some("%25")),
(
"https://example.com/a%2Fb?x=%3D#y%23z",
"example.com",
443,
"/a%2Fb",
Some("x=%3D"),
Some("y%23z"),
),
];
#[test]
fn parse_urls() {
for &(input, host, port, path, query, fragment) in PARSE_CASES {
let parsed = input.into_url().unwrap_or_else(|e| panic!("{input}: {e}"));
assert_eq!(parsed.host, host, "{input}: host");
assert_eq!(parsed.port, port, "{input}: port");
assert_eq!(parsed.path(), path, "{input}: path");
assert_eq!(parsed.query(), query, "{input}: query");
assert_eq!(parsed.fragment(), fragment, "{input}: fragment");
assert!(
!parsed.as_str().contains("%25") || input.contains("%25"),
"{input}: spurious %25 in as_str(): {}",
parsed.as_str()
);
}
}
#[test]
fn into_url_for_string_types() {
let s = String::from("https://example.com/test");
let a = "https://example.com/test".into_url().unwrap();
let b = s.clone().into_url().unwrap();
let c = (&s).into_url().unwrap();
for url in [&a, &b, &c] {
assert_eq!(url.host, "example.com", "host mismatch for {}", url.as_str());
}
}
#[test]
fn url_accessors() {
let url = "https://example.com:9443/api/v1?key=val#sect"
.into_url()
.unwrap();
assert_eq!(url.as_str(), "https://example.com:9443/api/v1?key=val#sect");
assert_eq!(url.scheme(), "https");
assert_eq!(url.host_str(), Some("example.com"));
assert_eq!(url.port(), Some(9443));
assert_eq!(url.port_or_known_default(), Some(9443));
assert_eq!(url.path(), "/api/v1");
assert_eq!(url.query(), Some("key=val"));
assert_eq!(url.fragment(), Some("sect"));
}
#[test]
fn url_default_port_returns_none() {
let url = "https://example.com/path".into_url().unwrap();
assert_eq!(url.port(), None);
assert_eq!(url.port_or_known_default(), Some(443));
}
#[test]
fn url_http_default_port() {
let url = "http://example.com/path".into_url().unwrap();
assert_eq!(url.port(), None);
assert_eq!(url.port_or_known_default(), Some(80));
}
#[test]
fn url_display() {
let url = "https://example.com/path".into_url().unwrap();
assert_eq!(format!("{url}"), "https://example.com/path");
}
#[test]
fn url_debug() {
let url = "https://example.com/path".into_url().unwrap();
let debug = format!("{url:?}");
assert!(debug.starts_with("Url { "), "expected struct debug: {debug}");
assert!(debug.contains("scheme: \"https\""), "scheme: {debug}");
assert!(debug.contains("host: Some(Domain(\"example.com\"))"), "host: {debug}");
assert!(debug.contains("path: \"/path\""), "path: {debug}");
assert!(debug.contains("port: None"), "default port should be None: {debug}");
}
#[test]
fn url_clone_eq() {
let a = "https://example.com/path".into_url().unwrap();
let b = a.clone();
assert_eq!(a, b);
}
#[test]
fn url_hash_consistency() {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
let u1 = "https://example.com/path".into_url().unwrap();
let u2 = "https://example.com/path".into_url().unwrap();
let mut h1 = DefaultHasher::new();
let mut h2 = DefaultHasher::new();
u1.hash(&mut h1);
u2.hash(&mut h2);
assert_eq!(h1.finish(), h2.finish());
}
#[test]
fn into_url_for_url_type() {
let url = "https://example.com/test".into_url().unwrap();
let url2 = url.into_url().unwrap();
assert_eq!(url2.as_str(), "https://example.com/test");
assert!(url2.is_https);
assert_eq!(url2.path_and_query, "/test");
}
#[test]
fn url_path_query_fragment_combinations() {
let cases: &[(&str, &str, Option<&str>, Option<&str>)] = &[
("https://example.com/page#section", "/page", None, Some("section")),
("https://example.com/search?q=test", "/search", Some("q=test"), None),
("https://example.com/path", "/path", None, None),
("https://example.com/#frag", "/", None, Some("frag")),
("https://example.com/?q=1#sect", "/", Some("q=1"), Some("sect")),
("https://example.com/page#", "/page", None, None),
("https://example.com/page?", "/page", None, None),
("https://example.com/page?#", "/page", None, None),
("https://example.com/path?q=1#", "/path", Some("q=1"), None),
("https://example.com/path?#frag", "/path", None, Some("frag")),
];
for &(input, path, query, fragment) in cases {
let url = Url::parse(input).unwrap();
assert_eq!(url.path(), path, "{input}: path");
assert_eq!(url.query(), query, "{input}: query");
assert_eq!(url.fragment(), fragment, "{input}: fragment");
}
}
#[test]
fn url_as_ref_str() {
let url: Url = "https://example.com/path".into_url().unwrap();
let s: &str = url.as_ref();
assert_eq!(s, "https://example.com/path");
}
#[test]
fn url_from_str() {
use std::str::FromStr;
let url = Url::from_str("https://example.com/api").unwrap();
assert_eq!(url.as_str(), "https://example.com/api");
}
#[test]
fn url_from_str_invalid() {
use std::str::FromStr;
let err = Url::from_str("not a url");
assert!(err.is_err());
}
#[test]
fn url_ordering() {
let a: Url = "https://aaa.com".into_url().unwrap();
let b: Url = "https://bbb.com".into_url().unwrap();
assert!(a < b);
assert!(b > a);
assert_eq!(a.cmp(&a), std::cmp::Ordering::Equal);
assert_eq!(a.partial_cmp(&b), Some(std::cmp::Ordering::Less));
let mut urls: Vec<Url> = vec![
"https://zzz.com".into_url().unwrap(),
"https://aaa.com".into_url().unwrap(),
"https://mmm.com".into_url().unwrap(),
];
urls.sort();
assert_eq!(urls[0].as_str(), "https://aaa.com/");
assert_eq!(urls[2].as_str(), "https://zzz.com/");
}
const JOIN_CASES: &[(&str, &str, &str, &str)] = &[
(
"https://example.com/api/v1",
"https://other.com/new",
"https://other.com/new",
"absolute url",
),
(
"https://example.com/api/v1",
"//other.com/path",
"https://other.com/path",
"scheme-relative",
),
(
"http://example.com/a",
"//cdn.example.com/js/app.js",
"http://cdn.example.com/js/app.js",
"scheme-relative preserves http",
),
(
"https://example.com/api/v1",
"/new/path",
"https://example.com/new/path",
"absolute path",
),
(
"https://example.com/api/v1",
"v2",
"https://example.com/api/v2",
"relative path (sibling)",
),
("https://example.com/a/b/c", "./d", "https://example.com/a/b/d", "dot-segment ./"),
("https://example.com/a/b/c", "../d", "https://example.com/a/d", "dot-segment ../"),
("https://example.com/a/b/c/d", "../../e", "https://example.com/a/e", "dot-segment ../../"),
("https://example.com/a", "../../b", "https://example.com/b", "dot-segment past root"),
(
"https://example.com/old/path",
"/a/b/../c",
"https://example.com/a/c",
"dot-segment in absolute path",
),
("https://example.com/a/b/c", ".", "https://example.com/a/b/", "trailing dot"),
("https://example.com/a/b/c", "..", "https://example.com/a/", "trailing dotdot"),
(
"https://example.com/a/b?q=1#f",
"",
"https://example.com/a/b?q=1#f",
"empty input returns base",
),
(
"https://example.com/a/b",
"?q=1",
"https://example.com/a/b?q=1",
"query-only preserves path",
),
(
"https://example.com/a/b?old=1",
"?new=2",
"https://example.com/a/b?new=2",
"query-only replaces query",
),
(
"https://example.com/a/b",
"?q=1#sec",
"https://example.com/a/b?q=1#sec",
"query with fragment",
),
(
"https://example.com/a/b?q=1",
"#sec2",
"https://example.com/a/b?q=1#sec2",
"fragment-only preserves path+query",
),
(
"https://example.com/a/b#old",
"#new",
"https://example.com/a/b#new",
"fragment-only replaces fragment",
),
(
"https://example.com/a/b",
"c?q=1#f",
"https://example.com/a/c?q=1#f",
"relative path with query+fragment",
),
(
"https://example.com/a/b",
"/x/y?q=1",
"https://example.com/x/y?q=1",
"absolute path with query",
),
];
#[test]
fn url_join() {
for &(base_str, reference, expected, label) in JOIN_CASES {
let base = Url::parse(base_str).unwrap();
let joined = base
.join(reference)
.unwrap_or_else(|e| panic!("{label}: join({base_str:?}, {reference:?}): {e}"));
assert_eq!(joined.as_str(), expected, "{label}: join({base_str:?}, {reference:?})",);
}
}
#[test]
fn url_join_preserves_custom_port() {
let base = Url::parse("https://example.com:9443/api").unwrap();
let joined = base.join("/other").unwrap();
assert_eq!(joined.port(), Some(9443));
assert_eq!(joined.path(), "/other");
let joined2 = base.join("//other.com/path").unwrap();
assert_eq!(joined2.host_str(), Some("other.com"));
let joined3 = base.join("?q=1").unwrap();
assert_eq!(joined3.port(), Some(9443));
assert_eq!(joined3.query(), Some("q=1"));
}
#[test]
fn url_username_password() {
let cases: &[(&str, &str, Option<&str>)] = &[
("https://example.com", "", None),
("https://alice:s3cret@example.com/path", "alice", Some("s3cret")),
("http://bob@example.com", "bob", None),
("https://user%40domain:p%3Ass@example.com/", "user@domain", Some("p:ss")),
("https://user:@example.com/", "user", Some("")),
("https://user%41%62:p%4Fss@example.com/", "userAb", Some("pOss")),
("https://%5A%6a@example.com/", "Zj", None),
("http://user%GG:pass@example.com/path", "user%GG", Some("pass")),
];
for &(input, username, password) in cases {
let url = Url::parse(input).unwrap();
assert_eq!(url.username(), username, "{input}: username");
assert_eq!(url.password(), password, "{input}: password");
}
}
#[test]
fn url_userinfo_stripped_from_serialization() {
let url = Url::parse("https://alice:s3cret@example.com/path").unwrap();
assert!(!url.as_str().contains("alice"));
assert!(!url.as_str().contains("s3cret"));
assert_eq!(url.host_str(), Some("example.com"));
assert_eq!(url.path(), "/path");
}
#[test]
fn extract_userinfo_table() {
let cases: &[(&str, &str, &str, Option<&str>)] = &[
("https://example.com/path", "https://example.com/path", "", None),
("https://alice:pw@host:8080/path", "https://host:8080/path", "alice", Some("pw")),
("http://user@host", "http://host", "user", None),
("https://user%40name:pw@host/p", "https://host/p", "user@name", Some("pw")),
];
for &(input, cleaned, user, pass) in cases {
let (actual_cleaned, actual_user, actual_pass) = extract_userinfo(input);
assert_eq!(actual_cleaned.as_ref(), cleaned, "{input}: cleaned");
assert_eq!(actual_user, user, "{input}: username");
assert_eq!(actual_pass.as_deref(), pass, "{input}: password");
}
}
#[test]
#[cfg(feature = "json")]
fn url_serialize() {
let url = Url::parse("https://example.com/path?q=1").unwrap();
let json = serde_json::to_string(&url).unwrap();
assert_eq!(json, "\"https://example.com/path?q=1\"");
}
#[test]
#[cfg(feature = "json")]
fn url_deserialize() {
let url: Url = serde_json::from_str("\"https://example.com/path\"").unwrap();
assert_eq!(url.as_str(), "https://example.com/path");
}
#[test]
#[cfg(feature = "json")]
fn url_roundtrip() {
let original = Url::parse("https://example.com/api?key=val#frag").unwrap();
let json = serde_json::to_string(&original).unwrap();
let deserialized: Url = serde_json::from_str(&json).unwrap();
assert_eq!(original, deserialized);
}
#[test]
#[cfg(feature = "json")]
fn url_deserialize_invalid() {
let result: Result<Url, _> = serde_json::from_str("\"not a valid url\"");
assert!(result.is_err());
}
#[test]
fn set_query_string_table() {
let cases: &[(&str, &str, &str, &str, &str, &str)] = &[
(
"https://example.com/api",
"key=val",
"key=val",
"https://example.com/api?key=val",
"/api?key=val",
"adds query",
),
(
"https://example.com:9443/api#frag",
"a=1&b=2",
"a=1&b=2",
"https://example.com:9443/api?a=1&b=2#frag",
"/api?a=1&b=2",
"with port and fragment",
),
(
"https://example.com/api?old=1",
"new=2",
"new=2",
"https://example.com/api?new=2",
"/api?new=2",
"replaces existing",
),
];
for &(input, query, exp_query, exp_str, exp_pq, label) in cases {
let mut url = Url::parse(input).unwrap();
url.set_query_string(query.to_owned());
assert_eq!(url.query(), Some(exp_query), "{label}: query()");
assert_eq!(url.as_str(), exp_str, "{label}: as_str()");
assert_eq!(url.path_and_query, exp_pq, "{label}: path_and_query");
}
}
const PARSE_ERROR_TABLE: &[(&str, ParseError, &str, &str)] = &[
(
"ftp://example.com/file",
ParseError::UnsupportedScheme,
"unsupported URL scheme",
"unsupported scheme",
),
("not a url", ParseError::InvalidUrl, "invalid URL", "invalid url (catch-all)"),
];
#[test]
fn parse_error_table() {
for (input, expected, display, label) in PARSE_ERROR_TABLE {
let err = Url::parse(input).unwrap_err();
assert_eq!(&err, expected, "{label}: variant");
assert_eq!(err.to_string(), *display, "{label}: Display");
let err2: ParseError = input.parse::<Url>().unwrap_err();
assert_eq!(&err2, expected, "{label}: FromStr variant");
}
}
#[test]
fn parse_error_display_parity() {
let cases: &[(ParseError, &str)] = &[
(ParseError::EmptyHost, "empty host"),
(ParseError::IdnaError, "invalid international domain name"),
(ParseError::InvalidPort, "invalid port number"),
(ParseError::InvalidIpv4Address, "invalid IPv4 address"),
(ParseError::InvalidIpv6Address, "invalid IPv6 address"),
(ParseError::InvalidDomainCharacter, "invalid domain character"),
(ParseError::RelativeUrlWithoutBase, "relative URL without a base"),
(
ParseError::RelativeUrlWithCannotBeABaseBase,
"relative URL with a cannot-be-a-base base",
),
(
ParseError::SetHostOnCannotBeABaseUrl,
"a cannot-be-a-base URL doesn't have a host to set",
),
(ParseError::Overflow, "URLs more than 4 GB are not supported"),
(ParseError::InvalidUrl, "invalid URL"),
(ParseError::UnsupportedScheme, "unsupported URL scheme"),
];
for (variant, expected) in cases {
assert_eq!(variant.to_string(), *expected, "{variant:?}");
}
}
#[test]
fn parse_error_traits() {
fn assert_std_error<T: std::error::Error>() {}
assert_std_error::<ParseError>();
let err = ParseError::UnsupportedScheme;
let cloned = err.clone();
assert_eq!(format!("{err:?}"), format!("{cloned:?}"));
}
#[test]
fn url_trivial_accessors() {
let url = Url::parse("https://example.com").unwrap();
assert!(url.has_host());
assert!(url.has_authority());
assert!(!url.cannot_be_a_base());
}
#[test]
fn url_domain_table() {
let cases: &[(&str, Option<&str>)] = &[
("https://example.com/path", Some("example.com")),
("https://sub.example.co.uk", Some("sub.example.co.uk")),
("https://127.0.0.1/path", None),
("https://[::1]/path", None),
("https://0.0.0.0", None),
];
for &(input, expected) in cases {
let url = Url::parse(input).unwrap();
assert_eq!(url.domain(), expected, "domain() for {input}");
}
}
#[test]
fn url_path_segments_table() {
let cases: &[(&str, &[&str])] = &[
("https://example.com/a/b/c", &["a", "b", "c"]),
("https://example.com/", &[""]),
("https://example.com/one", &["one"]),
("https://example.com/a/b/", &["a", "b", ""]),
];
for &(input, expected) in cases {
let url = Url::parse(input).unwrap();
let segs: Vec<&str> = url.path_segments().unwrap().collect();
assert_eq!(segs, expected, "path_segments() for {input}");
}
}
#[test]
fn url_into_string() {
let url = Url::parse("https://example.com/path?q=1").unwrap();
let s: String = url.into();
assert_eq!(s, "https://example.com/path?q=1");
}
#[test]
fn http_uri_conversion() {
type TestCase<'a> = (
&'a str,
&'a str,
(&'a str, &'a str, u16, Option<u16>),
(&'a str, Option<&'a str>),
(&'a str, Option<&'a str>),
&'a str,
);
let ok_cases: &[TestCase<'_>] = &[
(
"basic https",
"https://example.com/search?q=rust",
("https", "example.com", 443, None),
("/search", Some("q=rust")),
("", None),
"https://example.com/search?q=rust",
),
(
"http default port",
"http://example.com/index",
("http", "example.com", 80, None),
("/index", None),
("", None),
"http://example.com/index",
),
(
"explicit port",
"https://example.com:8443/p",
("https", "example.com", 8443, Some(8443)),
("/p", None),
("", None),
":8443",
),
(
"userinfo with password",
"https://user:pass@example.com/x",
("https", "example.com", 443, None),
("/x", None),
("user", Some("pass")),
"https://example.com/x",
),
(
"userinfo without password",
"https://alice@example.com/y",
("https", "example.com", 443, None),
("/y", None),
("alice", None),
"https://example.com/y",
),
(
"port + query roundtrip",
"https://example.com:4433/api?v=2",
("https", "example.com", 4433, Some(4433)),
("/api", Some("v=2")),
("", None),
":4433",
),
(
"http default port explicit",
"http://example.com:80/path",
("http", "example.com", 80, None),
("/path", None),
("", None),
"http://example.com/path",
),
(
"https default port explicit",
"https://example.com:443/path",
("https", "example.com", 443, None),
("/path", None),
("", None),
"https://example.com/path",
),
];
for &(
label,
input,
(scheme, host, port, explicit_port),
(path, query),
(user, pass),
contains,
) in ok_cases
{
let uri: http::Uri = input
.parse()
.unwrap_or_else(|e| panic!("{label}: parse URI: {e}"));
let url =
Url::from_http_uri(&uri).unwrap_or_else(|e| panic!("{label}: from_http_uri: {e}"));
assert_eq!(url.scheme(), scheme, "{label}: scheme");
assert_eq!(url.host_str(), Some(host), "{label}: host");
assert_eq!(url.port_or_known_default(), Some(port), "{label}: port");
assert_eq!(url.port(), explicit_port, "{label}: explicit_port");
assert_eq!(url.path(), path, "{label}: path");
assert_eq!(url.query(), query, "{label}: query");
assert_eq!(url.username(), user, "{label}: username");
assert_eq!(url.password(), pass, "{label}: password");
assert!(url.as_str().contains(contains), "{label}: serialized contains {contains:?}");
let back = url
.to_http_uri()
.unwrap_or_else(|e| panic!("{label}: to_http_uri: {e}"));
assert_eq!(back.scheme_str(), uri.scheme_str(), "{label}: roundtrip scheme");
assert_eq!(
back.path_and_query().map(|pq| pq.as_str()),
uri.path_and_query().map(|pq| pq.as_str()),
"{label}: roundtrip path_and_query"
);
}
let err_cases: &[(&str, http::Uri, ParseError)] = &[
(
"unsupported scheme",
"ftp://example.com/file".parse().unwrap(),
ParseError::UnsupportedScheme,
),
("no scheme", http::Uri::from_static("/relative"), ParseError::RelativeUrlWithoutBase),
("empty host", http::Uri::from_static("http://:8080/path"), ParseError::EmptyHost),
];
for (label, uri, expected) in err_cases {
let err = Url::from_http_uri(uri).unwrap_err();
assert_eq!(err, *expected, "{label}");
}
}
}