use core::fmt;
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum UriError {
Empty,
InvalidPercentEncoding,
InvalidPort,
InvalidCharacter(char),
InvalidScheme,
InvalidHost,
InvalidUtf8,
InvalidPathCharacter(u8),
InvalidQueryCharacter(u8),
InvalidFragmentCharacter(u8),
InvalidUserinfo,
}
impl fmt::Display for UriError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
UriError::Empty => write!(f, "empty URI"),
UriError::InvalidPercentEncoding => write!(f, "invalid percent encoding"),
UriError::InvalidPort => write!(f, "invalid port"),
UriError::InvalidCharacter(c) => write!(f, "invalid character: {:?}", c),
UriError::InvalidScheme => write!(f, "invalid scheme"),
UriError::InvalidHost => write!(f, "invalid host"),
UriError::InvalidUtf8 => write!(f, "invalid UTF-8 sequence"),
UriError::InvalidPathCharacter(b) => write!(f, "invalid path character: 0x{:02X}", b),
UriError::InvalidQueryCharacter(b) => write!(f, "invalid query character: 0x{:02X}", b),
UriError::InvalidFragmentCharacter(b) => {
write!(f, "invalid fragment character: 0x{:02X}", b)
}
UriError::InvalidUserinfo => write!(f, "invalid userinfo"),
}
}
}
impl std::error::Error for UriError {}
fn is_unreserved(c: u8) -> bool {
c.is_ascii_alphanumeric() || c == b'-' || c == b'.' || c == b'_' || c == b'~'
}
fn is_sub_delim(b: u8) -> bool {
matches!(
b,
b'!' | b'$' | b'&' | b'\'' | b'(' | b')' | b'*' | b'+' | b',' | b';' | b'='
)
}
fn is_pchar(b: u8) -> bool {
is_unreserved(b) || is_sub_delim(b) || b == b':' || b == b'@'
}
fn is_query_or_fragment_char(b: u8) -> bool {
is_pchar(b) || b == b'/' || b == b'?'
}
fn validate_percent_encoding(s: &str) -> Result<(), UriError> {
let bytes = s.as_bytes();
let mut i = 0;
while i < bytes.len() {
if bytes[i] == b'%' {
if i + 2 >= bytes.len() {
return Err(UriError::InvalidPercentEncoding);
}
if !bytes[i + 1].is_ascii_hexdigit() || !bytes[i + 2].is_ascii_hexdigit() {
return Err(UriError::InvalidPercentEncoding);
}
i += 3;
} else {
i += 1;
}
}
Ok(())
}
fn validate_path(path: &str) -> Result<(), UriError> {
validate_percent_encoding(path)?;
let bytes = path.as_bytes();
let mut i = 0;
while i < bytes.len() {
let b = bytes[i];
if b == b'%' {
i += 3;
} else if is_pchar(b) || b == b'/' {
i += 1;
} else {
return Err(UriError::InvalidPathCharacter(b));
}
}
Ok(())
}
fn validate_query(query: &str) -> Result<(), UriError> {
validate_percent_encoding(query)?;
let bytes = query.as_bytes();
let mut i = 0;
while i < bytes.len() {
let b = bytes[i];
if b == b'%' {
i += 3;
} else if is_query_or_fragment_char(b) {
i += 1;
} else {
return Err(UriError::InvalidQueryCharacter(b));
}
}
Ok(())
}
fn validate_fragment(fragment: &str) -> Result<(), UriError> {
validate_percent_encoding(fragment)?;
let bytes = fragment.as_bytes();
let mut i = 0;
while i < bytes.len() {
let b = bytes[i];
if b == b'%' {
i += 3;
} else if is_query_or_fragment_char(b) {
i += 1;
} else {
return Err(UriError::InvalidFragmentCharacter(b));
}
}
Ok(())
}
pub fn percent_encode(input: &str) -> String {
let mut result = String::with_capacity(input.len() * 3);
for byte in input.bytes() {
if is_unreserved(byte) {
result.push(byte as char);
} else {
result.push('%');
result.push(to_hex_char(byte >> 4));
result.push(to_hex_char(byte & 0x0F));
}
}
result
}
pub fn percent_encode_path(input: &str) -> String {
let mut result = String::with_capacity(input.len() * 3);
for byte in input.bytes() {
if is_unreserved(byte) || byte == b'/' {
result.push(byte as char);
} else {
result.push('%');
result.push(to_hex_char(byte >> 4));
result.push(to_hex_char(byte & 0x0F));
}
}
result
}
pub fn percent_encode_query(input: &str) -> String {
let mut result = String::with_capacity(input.len() * 3);
for byte in input.bytes() {
if is_unreserved(byte) || byte == b'=' || byte == b'&' {
result.push(byte as char);
} else {
result.push('%');
result.push(to_hex_char(byte >> 4));
result.push(to_hex_char(byte & 0x0F));
}
}
result
}
fn to_hex_char(nibble: u8) -> char {
match nibble {
0..=9 => (b'0' + nibble) as char,
10..=15 => (b'A' + nibble - 10) as char,
_ => unreachable!(),
}
}
pub fn percent_decode(input: &str) -> Result<String, UriError> {
let bytes = percent_decode_bytes(input)?;
String::from_utf8(bytes).map_err(|_| UriError::InvalidUtf8)
}
pub fn percent_decode_bytes(input: &str) -> Result<Vec<u8>, UriError> {
let mut result = Vec::with_capacity(input.len());
let mut bytes = input.bytes();
while let Some(byte) = bytes.next() {
if byte == b'%' {
let high = bytes.next().ok_or(UriError::InvalidPercentEncoding)?;
let low = bytes.next().ok_or(UriError::InvalidPercentEncoding)?;
let high = from_hex_char(high).ok_or(UriError::InvalidPercentEncoding)?;
let low = from_hex_char(low).ok_or(UriError::InvalidPercentEncoding)?;
result.push((high << 4) | low);
} else {
result.push(byte);
}
}
Ok(result)
}
fn from_hex_char(c: u8) -> Option<u8> {
match c {
b'0'..=b'9' => Some(c - b'0'),
b'A'..=b'F' => Some(c - b'A' + 10),
b'a'..=b'f' => Some(c - b'a' + 10),
_ => None,
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Uri {
source: String,
scheme_end: Option<usize>,
authority_start: Option<usize>,
authority_end: Option<usize>,
host_end: Option<usize>,
port: Option<u16>,
path_start: usize,
path_end: usize,
query_start: Option<usize>,
query_end: Option<usize>,
fragment_start: Option<usize>,
}
impl Uri {
pub fn parse(input: &str) -> Result<Self, UriError> {
if input.is_empty() {
return Err(UriError::Empty);
}
let source = input.to_string();
let bytes = input.as_bytes();
let len = bytes.len();
let mut pos = 0;
let scheme_end = if let Some(colon_pos) = find_scheme_end(bytes) {
if !bytes[0].is_ascii_alphabetic() {
return Err(UriError::InvalidScheme);
}
for &b in &bytes[1..colon_pos] {
if !b.is_ascii_alphanumeric() && b != b'+' && b != b'-' && b != b'.' {
return Err(UriError::InvalidScheme);
}
}
pos = colon_pos + 1;
Some(colon_pos)
} else {
None
};
let (authority_start, authority_end, host_end, port) =
if pos + 1 < len && bytes[pos] == b'/' && bytes[pos + 1] == b'/' {
pos += 2;
let auth_start = pos;
let auth_end = bytes[pos..]
.iter()
.position(|&b| b == b'/' || b == b'?' || b == b'#')
.map(|p| pos + p)
.unwrap_or(len);
let authority = &input[auth_start..auth_end];
let (h_end, p) = parse_authority(authority)?;
pos = auth_end;
(
Some(auth_start),
Some(auth_end),
Some(auth_start + h_end),
p,
)
} else {
(None, None, None, None)
};
let path_start = pos;
let path_end = bytes[pos..]
.iter()
.position(|&b| b == b'?' || b == b'#')
.map(|p| pos + p)
.unwrap_or(len);
pos = path_end;
let (query_start, query_end) = if pos < len && bytes[pos] == b'?' {
pos += 1;
let start = pos;
let end = bytes[pos..]
.iter()
.position(|&b| b == b'#')
.map(|p| pos + p)
.unwrap_or(len);
pos = end;
(Some(start), Some(end))
} else {
(None, None)
};
let fragment_start = if pos < len && bytes[pos] == b'#' {
Some(pos + 1)
} else {
None
};
let path = &input[path_start..path_end];
validate_path(path)?;
if let (Some(start), Some(end)) = (query_start, query_end) {
let query = &input[start..end];
validate_query(query)?;
}
if let Some(start) = fragment_start {
let fragment = &input[start..];
validate_fragment(fragment)?;
}
Ok(Uri {
source,
scheme_end,
authority_start,
authority_end,
host_end,
port,
path_start,
path_end,
query_start,
query_end,
fragment_start,
})
}
pub fn scheme(&self) -> Option<&str> {
self.scheme_end.map(|end| &self.source[..end])
}
pub fn authority(&self) -> Option<&str> {
match (self.authority_start, self.authority_end) {
(Some(start), Some(end)) => Some(&self.source[start..end]),
_ => None,
}
}
pub fn host(&self) -> Option<&str> {
match (self.authority_start, self.host_end) {
(Some(start), Some(end)) => {
let auth = &self.source[start..end];
if let Some(at_pos) = auth.rfind('@') {
Some(&auth[at_pos + 1..])
} else {
Some(auth)
}
}
_ => None,
}
}
pub fn port(&self) -> Option<u16> {
self.port
}
pub fn path(&self) -> &str {
&self.source[self.path_start..self.path_end]
}
pub fn query(&self) -> Option<&str> {
match (self.query_start, self.query_end) {
(Some(start), Some(end)) => Some(&self.source[start..end]),
_ => None,
}
}
pub fn fragment(&self) -> Option<&str> {
self.fragment_start.map(|start| &self.source[start..])
}
pub fn as_str(&self) -> &str {
&self.source
}
pub fn origin_form(&self) -> String {
let path = self.path();
let path = if path.is_empty() { "/" } else { path };
if let Some(query) = self.query() {
format!("{}?{}", path, query)
} else {
path.to_string()
}
}
pub fn is_absolute(&self) -> bool {
self.scheme_end.is_some()
}
pub fn is_relative(&self) -> bool {
self.scheme_end.is_none()
}
}
impl fmt::Display for Uri {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.source)
}
}
fn find_scheme_end(bytes: &[u8]) -> Option<usize> {
for (i, &b) in bytes.iter().enumerate() {
if b == b':' {
if i > 0 {
return Some(i);
}
return None;
}
if !b.is_ascii_alphanumeric() && b != b'+' && b != b'-' && b != b'.' {
return None;
}
}
None
}
fn validate_host(host: &str) -> Result<(), UriError> {
if host.is_empty() {
return Ok(());
}
if host.starts_with('[') {
let bracket_end = host.find(']').ok_or(UriError::InvalidHost)?;
if bracket_end != host.len() - 1 {
return Err(UriError::InvalidHost);
}
return validate_ip_literal(&host[1..bracket_end]);
}
if host.parse::<std::net::Ipv4Addr>().is_ok() {
return Ok(());
}
validate_reg_name(host)
}
fn validate_reg_name(name: &str) -> Result<(), UriError> {
let bytes = name.as_bytes();
let mut i = 0;
while i < bytes.len() {
let b = bytes[i];
if is_unreserved(b) || is_sub_delim(b) {
i += 1;
} else if b == b'%' {
if i + 2 >= bytes.len() {
return Err(UriError::InvalidHost);
}
if !bytes[i + 1].is_ascii_hexdigit() || !bytes[i + 2].is_ascii_hexdigit() {
return Err(UriError::InvalidHost);
}
i += 3;
} else {
return Err(UriError::InvalidHost);
}
}
Ok(())
}
fn validate_ip_literal(literal: &str) -> Result<(), UriError> {
if literal.is_empty() {
return Err(UriError::InvalidHost);
}
if literal.as_bytes()[0] == b'v' || literal.as_bytes()[0] == b'V' {
return validate_ipv_future(literal);
}
if literal.parse::<std::net::Ipv6Addr>().is_err() {
return Err(UriError::InvalidHost);
}
Ok(())
}
fn validate_ipv_future(literal: &str) -> Result<(), UriError> {
let bytes = literal.as_bytes();
let dot_pos = bytes
.iter()
.position(|&b| b == b'.')
.ok_or(UriError::InvalidHost)?;
if dot_pos <= 1 {
return Err(UriError::InvalidHost);
}
for &b in &bytes[1..dot_pos] {
if !b.is_ascii_hexdigit() {
return Err(UriError::InvalidHost);
}
}
let after_dot = &bytes[dot_pos + 1..];
if after_dot.is_empty() {
return Err(UriError::InvalidHost);
}
for &b in after_dot {
if !is_unreserved(b) && !is_sub_delim(b) && b != b':' {
return Err(UriError::InvalidHost);
}
}
Ok(())
}
fn validate_userinfo(userinfo: &str) -> Result<(), UriError> {
let bytes = userinfo.as_bytes();
let mut i = 0;
while i < bytes.len() {
let b = bytes[i];
if is_unreserved(b) || is_sub_delim(b) || b == b':' {
i += 1;
} else if b == b'%' {
if i + 2 >= bytes.len() {
return Err(UriError::InvalidUserinfo);
}
if !bytes[i + 1].is_ascii_hexdigit() || !bytes[i + 2].is_ascii_hexdigit() {
return Err(UriError::InvalidUserinfo);
}
i += 3;
} else {
return Err(UriError::InvalidUserinfo);
}
}
Ok(())
}
fn parse_authority(authority: &str) -> Result<(usize, Option<u16>), UriError> {
if authority.is_empty() {
return Ok((0, None));
}
let host_part = if let Some(at_pos) = authority.rfind('@') {
let userinfo = &authority[..at_pos];
validate_userinfo(userinfo)?;
&authority[at_pos + 1..]
} else {
authority
};
if host_part.starts_with('[') {
if let Some(bracket_end) = host_part.find(']') {
validate_ip_literal(&host_part[1..bracket_end])?;
let after_bracket = &host_part[bracket_end + 1..];
if after_bracket.is_empty() {
return Ok((authority.len(), None));
} else if let Some(port_str) = after_bracket.strip_prefix(':') {
let port = port_str.parse::<u16>().map_err(|_| UriError::InvalidPort)?;
return Ok((authority.len() - after_bracket.len(), Some(port)));
} else {
return Err(UriError::InvalidHost);
}
} else {
return Err(UriError::InvalidHost);
}
}
if let Some(colon_pos) = host_part.rfind(':') {
let host_str = &host_part[..colon_pos];
let port_str = &host_part[colon_pos + 1..];
validate_host(host_str)?;
if !port_str.is_empty() {
let port = port_str.parse::<u16>().map_err(|_| UriError::InvalidPort)?;
let host_end = if let Some(at_pos) = authority.rfind('@') {
at_pos + 1 + colon_pos
} else {
colon_pos
};
return Ok((host_end, Some(port)));
}
} else {
validate_host(host_part)?;
}
Ok((authority.len(), None))
}
pub fn resolve(base: &Uri, reference: &Uri) -> Result<Uri, UriError> {
if reference.is_absolute() {
let path = remove_dot_segments(reference.path());
return Uri::parse(&build_uri(
reference.scheme(),
reference.authority(),
&path,
reference.query(),
reference.fragment(),
));
}
if reference.authority().is_some() {
let path = remove_dot_segments(reference.path());
return Uri::parse(&build_uri(
base.scheme(),
reference.authority(),
&path,
reference.query(),
reference.fragment(),
));
}
if reference.path().is_empty() {
let query = reference.query().or(base.query());
return Uri::parse(&build_uri(
base.scheme(),
base.authority(),
base.path(),
query,
reference.fragment(),
));
}
let path = if reference.path().starts_with('/') {
remove_dot_segments(reference.path())
} else {
let merged = merge_paths(base, reference.path());
remove_dot_segments(&merged)
};
Uri::parse(&build_uri(
base.scheme(),
base.authority(),
&path,
reference.query(),
reference.fragment(),
))
}
fn merge_paths(base: &Uri, reference_path: &str) -> String {
if base.authority().is_some() && base.path().is_empty() {
format!("/{}", reference_path)
} else {
let base_path = base.path();
if let Some(last_slash) = base_path.rfind('/') {
format!("{}{}", &base_path[..=last_slash], reference_path)
} else {
reference_path.to_string()
}
}
}
fn remove_dot_segments(path: &str) -> String {
let mut output: Vec<&str> = Vec::new();
let mut i = 0;
let bytes = path.as_bytes();
let len = bytes.len();
while i < len {
if path[i..].starts_with("../") {
i += 3;
continue;
}
if path[i..].starts_with("./") {
i += 2;
continue;
}
if path[i..].starts_with("/./") {
i += 2; continue;
}
if &path[i..] == "/." {
output.push("/");
break;
}
if path[i..].starts_with("/../") {
i += 3; output.pop();
continue;
}
if &path[i..] == "/.." {
output.pop();
output.push("/");
break;
}
if &path[i..] == "." || &path[i..] == ".." {
break;
}
let start = i;
if bytes[i] == b'/' {
i += 1;
}
while i < len && bytes[i] != b'/' {
i += 1;
}
output.push(&path[start..i]);
}
output.concat()
}
fn build_uri(
scheme: Option<&str>,
authority: Option<&str>,
path: &str,
query: Option<&str>,
fragment: Option<&str>,
) -> String {
let mut result = String::new();
if let Some(s) = scheme {
result.push_str(s);
result.push(':');
}
if let Some(a) = authority {
result.push_str("//");
result.push_str(a);
}
result.push_str(path);
if let Some(q) = query {
result.push('?');
result.push_str(q);
}
if let Some(f) = fragment {
result.push('#');
result.push_str(f);
}
result
}
pub fn normalize(uri: &Uri) -> Result<Uri, UriError> {
let scheme = uri.scheme().map(|s| s.to_ascii_lowercase());
let authority = uri.authority().map(normalize_authority);
let path = remove_dot_segments(uri.path());
let path = normalize_percent_encoding(&path)?;
let query = uri.query().map(normalize_percent_encoding).transpose()?;
let fragment = uri.fragment().map(normalize_percent_encoding).transpose()?;
Uri::parse(&build_uri(
scheme.as_deref(),
authority.as_deref(),
&path,
query.as_deref(),
fragment.as_deref(),
))
}
fn normalize_authority(authority: &str) -> String {
if let Some(at_pos) = authority.rfind('@') {
let userinfo = &authority[..at_pos];
let host_port = &authority[at_pos + 1..];
format!("{}@{}", userinfo, normalize_host_port(host_port))
} else {
normalize_host_port(authority)
}
}
fn normalize_host_port(host_port: &str) -> String {
if let Some(bracket_end) = host_port.strip_prefix('[').and_then(|s| s.find(']')) {
let host = &host_port[..=bracket_end + 1];
let after = &host_port[bracket_end + 2..];
return format!("{}{}", host.to_ascii_lowercase(), after);
}
if let Some(colon_pos) = host_port.rfind(':') {
let host = &host_port[..colon_pos];
let port = &host_port[colon_pos..];
format!("{}{}", host.to_ascii_lowercase(), port)
} else {
host_port.to_ascii_lowercase()
}
}
fn normalize_percent_encoding(input: &str) -> Result<String, UriError> {
let mut result = String::with_capacity(input.len());
let mut bytes = input.bytes().peekable();
while let Some(byte) = bytes.next() {
if byte == b'%' {
let high = bytes.next().ok_or(UriError::InvalidPercentEncoding)?;
let low = bytes.next().ok_or(UriError::InvalidPercentEncoding)?;
let high_val = from_hex_char(high).ok_or(UriError::InvalidPercentEncoding)?;
let low_val = from_hex_char(low).ok_or(UriError::InvalidPercentEncoding)?;
let decoded = (high_val << 4) | low_val;
if is_unreserved(decoded) {
result.push(decoded as char);
} else {
result.push('%');
result.push(to_hex_char(decoded >> 4));
result.push(to_hex_char(decoded & 0x0F));
}
} else {
result.push(byte as char);
}
}
Ok(result)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_percent_encode() {
assert_eq!(percent_encode("hello"), "hello");
assert_eq!(percent_encode("hello world"), "hello%20world");
assert_eq!(percent_encode("foo=bar"), "foo%3Dbar");
assert_eq!(percent_encode("日本語"), "%E6%97%A5%E6%9C%AC%E8%AA%9E");
}
#[test]
fn test_percent_decode() {
assert_eq!(percent_decode("hello").unwrap(), "hello");
assert_eq!(percent_decode("hello%20world").unwrap(), "hello world");
assert_eq!(
percent_decode("%E6%97%A5%E6%9C%AC%E8%AA%9E").unwrap(),
"日本語"
);
}
#[test]
fn test_percent_decode_invalid() {
assert!(percent_decode("%").is_err());
assert!(percent_decode("%2").is_err());
assert!(percent_decode("%GG").is_err());
}
#[test]
fn test_uri_parse_full() {
let uri =
Uri::parse("https://user:pass@example.com:8080/path/to/resource?query=value#fragment")
.unwrap();
assert_eq!(uri.scheme(), Some("https"));
assert_eq!(uri.authority(), Some("user:pass@example.com:8080"));
assert_eq!(uri.host(), Some("example.com"));
assert_eq!(uri.port(), Some(8080));
assert_eq!(uri.path(), "/path/to/resource");
assert_eq!(uri.query(), Some("query=value"));
assert_eq!(uri.fragment(), Some("fragment"));
}
#[test]
fn test_uri_parse_simple() {
let uri = Uri::parse("http://example.com").unwrap();
assert_eq!(uri.scheme(), Some("http"));
assert_eq!(uri.host(), Some("example.com"));
assert_eq!(uri.port(), None);
assert_eq!(uri.path(), "");
assert_eq!(uri.query(), None);
assert_eq!(uri.fragment(), None);
}
#[test]
fn test_uri_parse_path_only() {
let uri = Uri::parse("/path/to/resource").unwrap();
assert_eq!(uri.scheme(), None);
assert_eq!(uri.host(), None);
assert_eq!(uri.path(), "/path/to/resource");
}
#[test]
fn test_uri_parse_relative() {
let uri = Uri::parse("../other/path").unwrap();
assert_eq!(uri.scheme(), None);
assert!(uri.is_relative());
assert_eq!(uri.path(), "../other/path");
}
#[test]
fn test_uri_parse_ipv6() {
let uri = Uri::parse("http://[::1]:8080/path").unwrap();
assert_eq!(uri.host(), Some("[::1]"));
assert_eq!(uri.port(), Some(8080));
}
#[test]
fn test_origin_form() {
let uri = Uri::parse("http://example.com/path?query").unwrap();
assert_eq!(uri.origin_form(), "/path?query");
let uri = Uri::parse("http://example.com").unwrap();
assert_eq!(uri.origin_form(), "/");
}
#[test]
fn test_resolve() {
let base = Uri::parse("http://example.com/a/b/c").unwrap();
let resolved = resolve(&base, &Uri::parse("../d").unwrap()).unwrap();
assert_eq!(resolved.path(), "/a/d");
let resolved = resolve(&base, &Uri::parse("/absolute").unwrap()).unwrap();
assert_eq!(resolved.path(), "/absolute");
let resolved = resolve(&base, &Uri::parse("relative").unwrap()).unwrap();
assert_eq!(resolved.path(), "/a/b/relative");
}
#[test]
fn test_remove_dot_segments() {
assert_eq!(remove_dot_segments("/a/b/c/./../../g"), "/a/g");
assert_eq!(remove_dot_segments("mid/content=5/../6"), "mid/6");
assert_eq!(remove_dot_segments("/../a"), "/a");
assert_eq!(remove_dot_segments("./a"), "a");
}
#[test]
fn test_normalize_authority_userinfo_preserved() {
let uri = Uri::parse("http://UserName:PassWord@EXAMPLE.COM/path").unwrap();
let normalized = normalize(&uri).unwrap();
assert_eq!(
normalized.authority(),
Some("UserName:PassWord@example.com")
);
}
#[test]
fn test_normalize_authority_without_userinfo() {
let uri = Uri::parse("http://EXAMPLE.COM:8080/path").unwrap();
let normalized = normalize(&uri).unwrap();
assert_eq!(normalized.authority(), Some("example.com:8080"));
}
}