1use std::fmt;
4use std::net::{Ipv4Addr, Ipv6Addr};
5use std::str::FromStr;
6
7use http::Uri;
8
9#[derive(Debug, Clone, PartialEq, Eq)]
11pub struct ParseError {
12 message: String,
13}
14
15impl ParseError {
16 fn new(message: impl Into<String>) -> Self {
17 Self {
18 message: message.into(),
19 }
20 }
21}
22
23impl fmt::Display for ParseError {
24 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
25 f.write_str(&self.message)
26 }
27}
28
29impl std::error::Error for ParseError {}
30
31#[derive(Debug, Clone, PartialEq, Eq)]
33pub enum Host {
34 Domain(String),
35 Ipv4(Ipv4Addr),
36 Ipv6(Ipv6Addr),
37}
38
39impl fmt::Display for Host {
40 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
41 match self {
42 Host::Domain(domain) => f.write_str(domain),
43 Host::Ipv4(addr) => write!(f, "{addr}"),
44 Host::Ipv6(addr) => write!(f, "{addr}"),
45 }
46 }
47}
48
49#[derive(Clone, PartialEq, Eq)]
51pub struct Url {
52 inner: String,
53 uri: Uri,
54 host_str: Option<String>,
55}
56
57impl fmt::Debug for Url {
58 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
59 f.debug_tuple("Url").field(&self.inner).finish()
60 }
61}
62
63impl fmt::Display for Url {
64 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
65 f.write_str(self.as_str())
66 }
67}
68
69impl Url {
70 pub fn parse(input: &str) -> Result<Self, ParseError> {
71 let without_fragment = input.split('#').next().unwrap_or(input);
72 if let Some(authority) = extract_authority(without_fragment) {
73 validate_authority(authority)?;
74 }
75 let uri = Uri::try_from(without_fragment)
76 .map_err(|err| ParseError::new(format!("invalid URI: {err}")))?;
77 if uri.scheme().is_none() {
78 return Err(ParseError::new("relative URL without a base"));
79 }
80 let host_str = uri
81 .authority()
82 .and_then(|authority| host_str_from_authority(authority.as_str()).ok());
83 Ok(Self {
84 inner: without_fragment.to_string(),
85 uri,
86 host_str,
87 })
88 }
89
90 #[inline]
91 pub fn as_str(&self) -> &str {
92 &self.inner
93 }
94
95 #[inline]
96 pub fn scheme(&self) -> &str {
97 self.uri.scheme_str().unwrap_or("")
98 }
99
100 #[inline]
101 pub fn path(&self) -> &str {
102 self.uri.path()
103 }
104
105 #[inline]
106 pub fn query(&self) -> Option<&str> {
107 self.uri.query()
108 }
109
110 #[inline]
111 pub fn port(&self) -> Option<u16> {
112 self.uri.port_u16()
113 }
114
115 #[inline]
116 pub fn port_or_known_default(&self) -> Option<u16> {
117 self.port().or_else(|| known_default_port(self.scheme()))
118 }
119
120 pub fn host(&self) -> Option<Host> {
121 let authority = self.uri.authority()?.as_str();
122 parse_host_port(authority).ok().map(|(host, _)| host)
123 }
124
125 #[inline]
126 pub fn host_str(&self) -> Option<&str> {
127 self.host_str.as_deref()
128 }
129
130 pub fn set_scheme(&mut self, scheme: &str) -> Result<(), ParseError> {
131 if scheme.is_empty() || !scheme.bytes().all(is_scheme_byte) {
132 return Err(ParseError::new("invalid URL scheme"));
133 }
134 let authority = self
135 .uri
136 .authority()
137 .map(|a| a.as_str())
138 .unwrap_or("")
139 .to_string();
140 let path_and_query = path_and_query_of(&self.uri);
141 *self = Self::assemble(scheme, &authority, &path_and_query)?;
142 Ok(())
143 }
144
145 pub fn set_port(&mut self, port: Option<u16>) -> Result<(), ParseError> {
146 let scheme = self.scheme();
147 let authority = format_authority_host(self.host(), port)?;
148 let path_and_query = path_and_query_of(&self.uri);
149 *self = Self::assemble(scheme, &authority, &path_and_query)?;
150 Ok(())
151 }
152
153 pub fn set_host(&mut self, host: Option<&str>) -> Result<(), ParseError> {
154 let host = host.ok_or_else(|| ParseError::new("missing URL host"))?;
155 validate_authority(host)?;
156 let scheme = self.scheme();
157 let host = parse_host_label(host)?;
158 let authority = format_authority_host(Some(host), self.port())?;
159 let path_and_query = path_and_query_of(&self.uri);
160 *self = Self::assemble(scheme, &authority, &path_and_query)?;
161 Ok(())
162 }
163
164 pub fn set_query(&mut self, query: Option<&str>) -> Result<(), ParseError> {
165 let scheme = self.scheme();
166 let authority = self
167 .uri
168 .authority()
169 .map(|a| a.as_str())
170 .unwrap_or("")
171 .to_string();
172 let mut path_and_query = self.path().to_string();
173 if let Some(q) = query {
174 path_and_query.push('?');
175 path_and_query.push_str(q);
176 }
177 *self = Self::assemble(scheme, &authority, &path_and_query)?;
178 Ok(())
179 }
180
181 pub fn join(&self, reference: &str) -> Result<Self, ParseError> {
183 let reference = reference.split('#').next().unwrap_or(reference);
184 if scheme_end(reference).is_some() {
185 return Self::parse(reference);
186 }
187
188 let base_scheme = self.scheme();
189 let base_authority = self.uri.authority().map(|a| a.as_str()).unwrap_or("");
190 let base_path = self.path();
191 let base_query = self.query();
192
193 if let Some(rest) = reference.strip_prefix("//") {
194 let (authority, path, query) = split_authority_reference(rest)?;
195 let path = normalize_path(&path);
196 return Self::assemble_with_query(base_scheme, &authority, &path, query.as_deref());
197 }
198
199 if reference.starts_with('/') {
200 let (path, query) = split_path_query(reference);
201 let path = normalize_path(path);
202 return Self::assemble_with_query(base_scheme, base_authority, &path, query.as_deref());
203 }
204
205 if let Some(query) = reference.strip_prefix('?') {
206 return Self::assemble_with_query(base_scheme, base_authority, base_path, Some(query));
207 }
208
209 if reference.is_empty() {
210 return Self::assemble_with_query(base_scheme, base_authority, base_path, base_query);
211 }
212
213 let (ref_path, ref_query) = split_path_query(reference);
214 let merged_path = normalize_path(&merge_paths(base_path, ref_path));
215 Self::assemble_with_query(
216 base_scheme,
217 base_authority,
218 &merged_path,
219 ref_query.as_deref(),
220 )
221 }
222
223 fn assemble(scheme: &str, authority: &str, path_and_query: &str) -> Result<Self, ParseError> {
224 let (path, query) = split_path_query(path_and_query);
225 Self::assemble_with_query(scheme, authority, path, query.as_deref())
226 }
227
228 fn assemble_with_query(
229 scheme: &str,
230 authority: &str,
231 path: &str,
232 query: Option<&str>,
233 ) -> Result<Self, ParseError> {
234 let path = if path.is_empty() { "/" } else { path };
235 let mut inner = format!("{scheme}://");
236 if !authority.is_empty() {
237 inner.push_str(authority);
238 }
239 inner.push_str(path);
240 if let Some(query) = query {
241 inner.push('?');
242 inner.push_str(query);
243 }
244 Self::parse(&inner)
245 }
246}
247
248fn path_and_query_of(uri: &Uri) -> String {
249 match uri.path_and_query() {
250 Some(pq) => pq.as_str().to_string(),
251 None => "/".to_string(),
252 }
253}
254
255fn known_default_port(scheme: &str) -> Option<u16> {
256 match scheme {
257 "http" | "ws" => Some(80),
258 "https" | "wss" => Some(443),
259 _ => None,
260 }
261}
262
263fn is_scheme_byte(byte: u8) -> bool {
264 byte.is_ascii_alphanumeric() || matches!(byte, b'+' | b'-' | b'.')
265}
266
267fn scheme_end(input: &str) -> Option<usize> {
268 if !input
269 .chars()
270 .next()
271 .is_some_and(|c| c.is_ascii_alphabetic())
272 {
273 return None;
274 }
275 let mut end = 0;
276 for (idx, ch) in input.char_indices().skip(1) {
277 if is_scheme_byte(ch as u8) {
278 end = idx + ch.len_utf8();
279 } else if ch == ':' {
280 return Some(end);
281 } else {
282 return None;
283 }
284 }
285 None
286}
287
288fn extract_authority(input: &str) -> Option<&str> {
289 let scheme_sep = input.find("://")?;
290 let after_scheme = &input[scheme_sep + 3..];
291 after_scheme
292 .split(&['/', '?'][..])
293 .next()
294 .filter(|part| !part.is_empty())
295}
296
297fn validate_authority(authority: &str) -> Result<(), ParseError> {
298 if authority.contains('@') {
299 return Err(ParseError::new(
300 "userinfo in URL authority is not supported",
301 ));
302 }
303 if !authority.is_ascii() {
304 return Err(ParseError::new("non-ASCII host requires explicit punycode"));
305 }
306 Ok(())
307}
308
309fn host_str_from_authority(authority: &str) -> Result<String, ParseError> {
310 let (host, _) = parse_host_port(authority)?;
311 Ok(match host {
312 Host::Domain(domain) => domain,
313 Host::Ipv4(addr) => addr.to_string(),
314 Host::Ipv6(addr) => addr.to_string(),
315 })
316}
317
318fn parse_host_port(authority: &str) -> Result<(Host, Option<u16>), ParseError> {
319 if authority.is_empty() {
320 return Err(ParseError::new("missing URL host"));
321 }
322
323 if authority.starts_with('[') {
324 let end = authority
325 .find(']')
326 .ok_or_else(|| ParseError::new("invalid IPv6 authority"))?;
327 let ip = Ipv6Addr::from_str(&authority[1..end])
328 .map_err(|_| ParseError::new("invalid IPv6 address"))?;
329 let port = parse_port_suffix(&authority[end + 1..])?;
330 return Ok((Host::Ipv6(ip), port));
331 }
332
333 if let Some((host, port)) = authority.rsplit_once(':') {
334 if !host.is_empty() && port.chars().all(|c| c.is_ascii_digit()) {
335 let port = port
336 .parse::<u16>()
337 .map_err(|_| ParseError::new("invalid port"))?;
338 return Ok((parse_host_label(host)?, Some(port)));
339 }
340 }
341
342 Ok((parse_host_label(authority)?, None))
343}
344
345fn parse_host_label(host: &str) -> Result<Host, ParseError> {
346 if let Ok(ip) = Ipv4Addr::from_str(host) {
347 return Ok(Host::Ipv4(ip));
348 }
349 Ok(Host::Domain(host.to_ascii_lowercase()))
350}
351
352fn parse_port_suffix(suffix: &str) -> Result<Option<u16>, ParseError> {
353 if suffix.is_empty() {
354 return Ok(None);
355 }
356 if !suffix.starts_with(':') {
357 return Err(ParseError::new("invalid port suffix"));
358 }
359 suffix[1..]
360 .parse::<u16>()
361 .map(Some)
362 .map_err(|_| ParseError::new("invalid port"))
363}
364
365fn format_authority_host(host: Option<Host>, port: Option<u16>) -> Result<String, ParseError> {
366 let host = host.ok_or_else(|| ParseError::new("missing URL host"))?;
367 let mut authority = match host {
368 Host::Domain(domain) => domain,
369 Host::Ipv4(addr) => addr.to_string(),
370 Host::Ipv6(addr) => format!("[{addr}]"),
371 };
372 if let Some(port) = port {
373 authority.push(':');
374 authority.push_str(&port.to_string());
375 }
376 Ok(authority)
377}
378
379fn split_path_query(input: &str) -> (&str, Option<String>) {
380 match input.split_once('?') {
381 Some((path, query)) => (path, Some(query.to_string())),
382 None => (input, None),
383 }
384}
385
386fn split_authority_reference(input: &str) -> Result<(String, String, Option<String>), ParseError> {
387 let authority_end = input
388 .find('/')
389 .or_else(|| input.find('?'))
390 .unwrap_or(input.len());
391 let authority = &input[..authority_end];
392 let rest = &input[authority_end..];
393
394 if authority.is_empty() {
395 return Err(ParseError::new("missing authority in reference"));
396 }
397
398 let (path, query) = if rest.is_empty() {
399 ("/".to_string(), None)
400 } else if let Some(query) = rest.strip_prefix('?') {
401 ("/".to_string(), Some(query.to_string()))
402 } else {
403 let (path, query) = split_path_query(&rest[1..]);
404 let path = if path.is_empty() {
405 "/".to_string()
406 } else {
407 format!("/{path}")
408 };
409 (path, query)
410 };
411
412 Ok((authority.to_string(), path, query))
413}
414
415fn merge_paths(base_path: &str, reference_path: &str) -> String {
416 let prefix = if let Some(idx) = base_path.rfind('/') {
417 &base_path[..=idx]
418 } else {
419 ""
420 };
421 format!("{prefix}{reference_path}")
422}
423
424fn normalize_path(path: &str) -> String {
425 let (path_only, query) = split_path_query(path);
426 let normalized = remove_dot_segments(path_only);
427 match query {
428 Some(query) => format!("{normalized}?{query}"),
429 None => normalized,
430 }
431}
432
433fn remove_dot_segments(path: &str) -> String {
434 let mut input = path.to_string();
437 let mut output = String::new();
438
439 while !input.is_empty() {
440 if let Some(rest) = input.strip_prefix("../") {
441 input = rest.to_string();
442 } else if let Some(rest) = input.strip_prefix("./") {
443 input = rest.to_string();
444 } else if let Some(rest) = input.strip_prefix("/./") {
445 input = format!("/{rest}");
446 } else if input == "/." {
447 input = "/".to_string();
448 } else if let Some(rest) = input.strip_prefix("/../") {
449 input = format!("/{rest}");
450 pop_last_segment(&mut output);
451 } else if input == "/.." {
452 input = "/".to_string();
453 pop_last_segment(&mut output);
454 } else if input == "." || input == ".." {
455 input.clear();
456 } else {
457 let start = if input.starts_with('/') { 1 } else { 0 };
458 let end = match input[start..].find('/') {
459 Some(idx) => start + idx,
460 None => input.len(),
461 };
462 output.push_str(&input[..end]);
463 input = input[end..].to_string();
464 }
465 }
466
467 output
468}
469
470fn pop_last_segment(output: &mut String) {
471 while let Some(byte) = output.as_bytes().last() {
473 if *byte == b'/' {
474 break;
475 }
476 output.pop();
477 }
478 if output.ends_with('/') {
479 output.pop();
480 }
481}
482
483#[cfg(test)]
484mod unit_tests {
485 use super::*;
486
487 #[test]
488 fn rejects_non_ascii_authority() {
489 let err = Url::parse("https://exämple.com/").unwrap_err();
490 assert!(err.to_string().contains("non-ASCII"));
491 }
492
493 #[test]
494 fn rejects_userinfo() {
495 let err = Url::parse("http://user:pass@host/").unwrap_err();
496 assert!(err.to_string().contains("userinfo"));
497 }
498}