use core::fmt;
use core::str::FromStr;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
#[cfg(feature = "zeroize")]
use zeroize::Zeroize;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[non_exhaustive]
pub enum UrlError {
Empty,
TooLong(usize),
MissingScheme,
InvalidScheme,
MissingSchemeSeparator,
MissingAuthority,
InvalidHost,
InvalidPort,
InvalidChar(char),
}
impl fmt::Display for UrlError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Empty => write!(f, "URL cannot be empty"),
Self::TooLong(len) => write!(
f,
"URL exceeds maximum length of 2048 characters (got {len})"
),
Self::MissingScheme => write!(f, "URL must contain a scheme"),
Self::InvalidScheme => write!(f, "URL scheme is invalid"),
Self::MissingSchemeSeparator => write!(f, "URL scheme must be followed by :"),
Self::MissingAuthority => write!(f, "URL has // but no authority"),
Self::InvalidHost => write!(f, "URL host is invalid"),
Self::InvalidPort => write!(f, "URL port is invalid"),
Self::InvalidChar(c) => write!(f, "URL contains invalid character '{c}'"),
}
}
}
#[cfg(feature = "std")]
impl std::error::Error for UrlError {}
#[repr(transparent)]
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "zeroize", derive(Zeroize))]
pub struct Url(heapless::String<2048>);
#[cfg(feature = "arbitrary")]
impl<'a> arbitrary::Arbitrary<'a> for Url {
fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
const SCHEMES: &[&str] = &["http", "https", "ftp", "file"];
const ALPHABET: &[u8] = b"abcdefghijklmnopqrstuvwxyz";
const DIGITS: &[u8] = b"0123456789";
let scheme_idx = u8::arbitrary(u)? as usize % SCHEMES.len();
let scheme = SCHEMES[scheme_idx];
let label_count = 1 + (u8::arbitrary(u)? % 3);
let mut host = heapless::String::<253>::new();
for label_idx in 0..label_count {
let label_len = 1 + (u8::arbitrary(u)? % 20).min(19);
for _ in 0..label_len {
let byte = u8::arbitrary(u)?;
let c = match byte % 2 {
0 => ALPHABET[(byte % 26) as usize] as char,
_ => DIGITS[(byte % 10) as usize] as char,
};
host.push(c)
.map_err(|_| arbitrary::Error::IncorrectFormat)?;
}
if label_idx < label_count - 1 {
host.push('.')
.map_err(|_| arbitrary::Error::IncorrectFormat)?;
}
}
let has_port = bool::arbitrary(u)?;
let mut url = heapless::String::<2048>::new();
url.push_str(scheme)
.map_err(|_| arbitrary::Error::IncorrectFormat)?;
url.push(':')
.map_err(|_| arbitrary::Error::IncorrectFormat)?;
url.push('/')
.map_err(|_| arbitrary::Error::IncorrectFormat)?;
url.push('/')
.map_err(|_| arbitrary::Error::IncorrectFormat)?;
url.push_str(&host)
.map_err(|_| arbitrary::Error::IncorrectFormat)?;
if has_port {
let port = 1 + (u16::arbitrary(u)? % 65535);
url.push(':')
.map_err(|_| arbitrary::Error::IncorrectFormat)?;
url.push_str(&port.to_string())
.map_err(|_| arbitrary::Error::IncorrectFormat)?;
}
Ok(Self(url))
}
}
impl Url {
#[allow(clippy::missing_panics_doc)]
pub fn new(s: &str) -> Result<Self, UrlError> {
if s.is_empty() {
return Err(UrlError::Empty);
}
if s.len() > 2048 {
return Err(UrlError::TooLong(s.len()));
}
let Some(scheme_end) = s.find(':') else {
return Err(UrlError::MissingSchemeSeparator);
};
if scheme_end == 0 {
return Err(UrlError::MissingScheme);
}
let scheme = &s[..scheme_end];
if !scheme
.chars()
.next()
.is_some_and(|c| c.is_ascii_alphabetic())
{
return Err(UrlError::InvalidScheme);
}
for c in scheme.chars() {
if !c.is_ascii_alphanumeric() && !matches!(c, '+' | '.' | '-') {
return Err(UrlError::InvalidScheme);
}
}
let rest = &s[scheme_end + 1..];
let has_authority = rest.starts_with("//");
if has_authority {
let authority_start = scheme_end + 3;
if authority_start >= s.len() {
return Err(UrlError::MissingAuthority);
}
let authority_end = s[authority_start..]
.find(['/', '?', '#'])
.map_or(s.len(), |pos| authority_start + pos);
let authority = &s[authority_start..authority_end];
if authority.is_empty() {
return Err(UrlError::MissingAuthority);
}
let host = authority.split(':').next().unwrap_or(authority);
if host.is_empty() {
return Err(UrlError::InvalidHost);
}
if let Some(port_str) = authority.split(':').nth(1) {
if port_str.is_empty() {
return Err(UrlError::InvalidPort);
}
if !port_str.chars().all(|c| c.is_ascii_digit()) {
return Err(UrlError::InvalidPort);
}
}
}
for c in s.chars() {
if !Self::is_valid_url_char(c) {
return Err(UrlError::InvalidChar(c));
}
}
let mut inner = heapless::String::<2048>::new();
for c in scheme.chars() {
inner
.push(c.to_ascii_lowercase())
.map_err(|_| UrlError::TooLong(2048))?;
}
inner.push(':').map_err(|_| UrlError::TooLong(2048))?;
inner.push_str(rest).map_err(|_| UrlError::TooLong(2048))?;
Ok(Self(inner))
}
const fn is_valid_url_char(c: char) -> bool {
c.is_ascii_alphanumeric()
|| matches!(
c,
':' | '/'
| '?'
| '#'
| '['
| ']'
| '@'
| '!'
| '$'
| '&'
| '\''
| '('
| ')'
| '*'
| '+'
| ','
| ';'
| '='
| '-'
| '.'
| '_'
| '~'
| '%'
)
}
#[must_use]
#[inline]
pub fn as_str(&self) -> &str {
&self.0
}
#[must_use]
#[inline]
pub const fn as_inner(&self) -> &heapless::String<2048> {
&self.0
}
#[must_use]
#[inline]
pub fn into_inner(self) -> heapless::String<2048> {
self.0
}
#[must_use]
#[allow(clippy::missing_panics_doc)]
pub fn scheme(&self) -> &str {
self.as_str()
.split(':')
.next()
.expect("URL always contains :")
}
#[must_use]
pub fn host(&self) -> Option<&str> {
let rest = self.as_str().split_once(':')?.1;
if !rest.starts_with("//") {
return None;
}
let authority = &rest[2..];
let authority_end = authority.find(['/', '?', '#']).unwrap_or(authority.len());
let authority = &authority[..authority_end];
authority.split(':').next()
}
#[must_use]
pub fn port(&self) -> Option<&str> {
let rest = self.as_str().split_once(':')?.1;
if !rest.starts_with("//") {
return None;
}
let authority = &rest[2..];
let authority_end = authority.find(['/', '?', '#']).unwrap_or(authority.len());
let authority = &authority[..authority_end];
authority.split(':').nth(1)
}
#[must_use]
pub fn path(&self) -> Option<&str> {
let rest = self.as_str().split_once(':')?.1;
let authority_end = rest.strip_prefix("//").map_or(0, |authority| {
let end = authority.find(['/', '?', '#']).unwrap_or(authority.len());
2 + end
});
let path_start = self.as_str().len().saturating_sub(rest.len()) + authority_end;
if path_start >= self.as_str().len() {
return None;
}
let path_and_query_fragment = &self.as_str()[path_start..];
let path_end = path_and_query_fragment
.find(['?', '#'])
.unwrap_or(path_and_query_fragment.len());
let path = &path_and_query_fragment[..path_end];
if path.is_empty() { None } else { Some(path) }
}
#[must_use]
pub fn query(&self) -> Option<&str> {
let query_start = self.as_str().find('?')?;
let query_and_fragment = &self.as_str()[query_start + 1..];
let query_end = query_and_fragment
.find('#')
.unwrap_or(query_and_fragment.len());
let query = &query_and_fragment[..query_end];
if query.is_empty() { None } else { Some(query) }
}
#[must_use]
pub fn fragment(&self) -> Option<&str> {
let fragment_start = self.as_str().find('#')?;
let fragment = &self.as_str()[fragment_start + 1..];
if fragment.is_empty() {
None
} else {
Some(fragment)
}
}
}
impl TryFrom<&str> for Url {
type Error = UrlError;
fn try_from(s: &str) -> Result<Self, Self::Error> {
Self::new(s)
}
}
impl From<Url> for heapless::String<2048> {
fn from(url: Url) -> Self {
url.0
}
}
impl FromStr for Url {
type Err = UrlError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
Self::new(s)
}
}
impl fmt::Display for Url {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.0)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_new_valid_url() {
assert!(Url::new("https://example.com").is_ok());
assert!(Url::new("http://example.com").is_ok());
assert!(Url::new("ftp://example.com").is_ok());
assert!(Url::new("file://localhost/path/to/file").is_ok());
}
#[test]
fn test_empty_url() {
assert_eq!(Url::new(""), Err(UrlError::Empty));
}
#[test]
fn test_too_long_url() {
let long = format!("https://{}.com", "a".repeat(3000));
assert!(long.len() > 2048);
assert_eq!(Url::new(&long), Err(UrlError::TooLong(long.len())));
}
#[test]
fn test_missing_scheme() {
assert_eq!(Url::new("://example.com"), Err(UrlError::MissingScheme));
}
#[test]
fn test_missing_scheme_separator() {
assert_eq!(
Url::new("https//example.com"),
Err(UrlError::MissingSchemeSeparator)
);
}
#[test]
fn test_invalid_scheme() {
assert_eq!(Url::new("123://example.com"), Err(UrlError::InvalidScheme));
}
#[test]
fn test_missing_authority() {
assert_eq!(Url::new("https://"), Err(UrlError::MissingAuthority));
}
#[test]
fn test_as_str() {
let url = Url::new("https://example.com").unwrap();
assert_eq!(url.as_str(), "https://example.com");
}
#[test]
fn test_as_inner() {
let url = Url::new("https://example.com").unwrap();
let inner = url.as_inner();
assert_eq!(inner.as_str(), "https://example.com");
}
#[test]
fn test_into_inner() {
let url = Url::new("https://example.com").unwrap();
let inner = url.into_inner();
assert_eq!(inner.as_str(), "https://example.com");
}
#[test]
fn test_scheme() {
let url = Url::new("https://example.com").unwrap();
assert_eq!(url.scheme(), "https");
}
#[test]
fn test_scheme_case_insensitive() {
let url = Url::new("HTTPS://example.com").unwrap();
assert_eq!(url.scheme(), "https");
assert_eq!(url.as_str(), "https://example.com");
}
#[test]
fn test_host() {
let url = Url::new("https://example.com").unwrap();
assert_eq!(url.host(), Some("example.com"));
}
#[test]
fn test_host_with_port() {
let url = Url::new("https://example.com:8080").unwrap();
assert_eq!(url.host(), Some("example.com"));
}
#[test]
fn test_port() {
let url = Url::new("https://example.com:8080").unwrap();
assert_eq!(url.port(), Some("8080"));
}
#[test]
fn test_port_none() {
let url = Url::new("https://example.com").unwrap();
assert_eq!(url.port(), None);
}
#[test]
fn test_path() {
let url = Url::new("https://example.com/path").unwrap();
assert_eq!(url.path(), Some("/path"));
}
#[test]
fn test_path_none() {
let url = Url::new("https://example.com").unwrap();
assert_eq!(url.path(), None);
}
#[test]
fn test_query() {
let url = Url::new("https://example.com?query=value").unwrap();
assert_eq!(url.query(), Some("query=value"));
}
#[test]
fn test_query_none() {
let url = Url::new("https://example.com").unwrap();
assert_eq!(url.query(), None);
}
#[test]
fn test_fragment() {
let url = Url::new("https://example.com#fragment").unwrap();
assert_eq!(url.fragment(), Some("fragment"));
}
#[test]
fn test_fragment_none() {
let url = Url::new("https://example.com").unwrap();
assert_eq!(url.fragment(), None);
}
#[test]
fn test_full_url() {
let url = Url::new("https://example.com:8080/path?query=value#fragment").unwrap();
assert_eq!(url.scheme(), "https");
assert_eq!(url.host(), Some("example.com"));
assert_eq!(url.port(), Some("8080"));
assert_eq!(url.path(), Some("/path"));
assert_eq!(url.query(), Some("query=value"));
assert_eq!(url.fragment(), Some("fragment"));
}
#[test]
fn test_try_from_str() {
let url = Url::try_from("https://example.com").unwrap();
assert_eq!(url.as_str(), "https://example.com");
}
#[test]
fn test_from_url_to_string() {
let url = Url::new("https://example.com").unwrap();
let inner: heapless::String<2048> = url.into();
assert_eq!(inner.as_str(), "https://example.com");
}
#[test]
fn test_from_str() {
let url: Url = "https://example.com".parse().unwrap();
assert_eq!(url.as_str(), "https://example.com");
}
#[test]
fn test_from_str_invalid() {
assert!("".parse::<Url>().is_err());
assert!("https://".parse::<Url>().is_err());
assert!("://example.com".parse::<Url>().is_err());
}
#[test]
fn test_display() {
let url = Url::new("https://example.com").unwrap();
assert_eq!(format!("{url}"), "https://example.com");
}
#[test]
fn test_equality() {
let url1 = Url::new("https://example.com").unwrap();
let url2 = Url::new("https://example.com").unwrap();
let url3 = Url::new("https://example.org").unwrap();
assert_eq!(url1, url2);
assert_ne!(url1, url3);
}
#[test]
fn test_ordering() {
let url1 = Url::new("https://a.com").unwrap();
let url2 = Url::new("https://b.com").unwrap();
assert!(url1 < url2);
}
#[test]
fn test_clone() {
let url = Url::new("https://example.com").unwrap();
let url2 = url.clone();
assert_eq!(url, url2);
}
#[test]
fn test_scheme_with_plus() {
assert!(Url::new("git+ssh://example.com").is_ok());
}
#[test]
fn test_scheme_with_dot() {
assert!(Url::new("web+scheme://example.com").is_ok());
}
#[test]
fn test_scheme_with_dash() {
assert!(Url::new("custom-scheme://example.com").is_ok());
}
#[test]
fn test_error_display() {
assert_eq!(format!("{}", UrlError::Empty), "URL cannot be empty");
assert_eq!(
format!("{}", UrlError::TooLong(3000)),
"URL exceeds maximum length of 2048 characters (got 3000)"
);
assert_eq!(
format!("{}", UrlError::MissingScheme),
"URL must contain a scheme"
);
assert_eq!(
format!("{}", UrlError::InvalidScheme),
"URL scheme is invalid"
);
assert_eq!(
format!("{}", UrlError::MissingSchemeSeparator),
"URL scheme must be followed by :"
);
assert_eq!(
format!("{}", UrlError::MissingAuthority),
"URL has // but no authority"
);
assert_eq!(format!("{}", UrlError::InvalidHost), "URL host is invalid");
assert_eq!(format!("{}", UrlError::InvalidPort), "URL port is invalid");
assert_eq!(
format!("{}", UrlError::InvalidChar(' ')),
"URL contains invalid character ' '"
);
}
#[test]
fn test_hash() {
use core::hash::Hash;
use core::hash::Hasher;
#[derive(Default)]
struct SimpleHasher(u64);
impl Hasher for SimpleHasher {
fn finish(&self) -> u64 {
self.0
}
fn write(&mut self, bytes: &[u8]) {
for byte in bytes {
self.0 = self.0.wrapping_mul(31).wrapping_add(*byte as u64);
}
}
}
let url1 = Url::new("https://example.com").unwrap();
let url2 = Url::new("https://example.com").unwrap();
let url3 = Url::new("https://example.org").unwrap();
let mut hasher1 = SimpleHasher::default();
let mut hasher2 = SimpleHasher::default();
let mut hasher3 = SimpleHasher::default();
url1.hash(&mut hasher1);
url2.hash(&mut hasher2);
url3.hash(&mut hasher3);
assert_eq!(hasher1.finish(), hasher2.finish());
assert_ne!(hasher1.finish(), hasher3.finish());
}
#[test]
fn test_debug() {
let url = Url::new("https://example.com").unwrap();
assert_eq!(format!("{:?}", url), "Url(\"https://example.com\")");
}
#[test]
fn test_from_into_inner_roundtrip() {
let url = Url::new("https://example.com").unwrap();
let inner: heapless::String<2048> = url.into();
let url2 = Url::new(inner.as_str()).unwrap();
assert_eq!(url2.as_str(), "https://example.com");
}
#[test]
fn test_url_without_authority() {
let url = Url::new("mailto:user@example.com").unwrap();
assert_eq!(url.scheme(), "mailto");
assert_eq!(url.host(), None);
}
#[test]
fn test_ip_host() {
assert!(Url::new("https://127.0.0.1").is_ok());
}
#[test]
fn test_percent_encoding() {
assert!(Url::new("https://example.com/path%20with%20spaces").is_ok());
}
}