1use safe_regex::{Matcher3, Matcher9, regex};
2use std::fmt::{Display, Formatter};
3use std::net::IpAddr;
4
5#[must_use]
8pub fn upper_hex_char(b: u8) -> char {
9 const TABLE: [char; 16] = [
10 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F',
11 ];
12 assert!(b < 16, "cannot convert number to hex: {b}");
13 TABLE[b as usize]
14}
15
16#[must_use]
17pub fn from_hex_byte(b: u8) -> Option<u8> {
18 match b {
19 b'0'..=b'9' => Some(b - b'0'),
20 b'a'..=b'f' => Some(10 + b - b'a'),
21 b'A'..=b'F' => Some(10 + b - b'A'),
22 _ => None,
23 }
24}
25
26pub fn percent_decode(bytes: impl AsRef<[u8]>) -> String {
27 let mut result_bytes: Vec<u8> = Vec::new();
29 let bytes = bytes.as_ref();
30 let mut n = 0;
31 while n < bytes.len() {
32 match bytes[n] {
33 b'%' if n + 2 < bytes.len() => {
34 let opt_d1 = from_hex_byte(bytes[n + 1]);
35 let opt_d0 = from_hex_byte(bytes[n + 2]);
36 match (opt_d1, opt_d0) {
37 (Some(d1), Some(d0)) => {
38 let b = (d1 << 4) | d0;
39 result_bytes.push(b);
40 n += 2;
41 }
42 _ => result_bytes.push(b'%'),
43 }
44 }
45 c => result_bytes.push(c),
46 }
47 n += 1;
48 }
49 String::from_utf8_lossy(&result_bytes).to_string()
50}
51
52#[derive(Copy, Clone, Debug, Eq, PartialEq)]
53pub enum PercentEncodePurpose {
54 Fragment,
55 Path,
56 UserInfo,
57}
58
59#[allow(clippy::match_same_arms)]
60pub fn percent_encode(s: impl AsRef<str>, purpose: PercentEncodePurpose) -> String {
61 let mut result = String::new();
63 for c in s.as_ref().chars() {
64 let is_reserved = match (purpose, c) {
65 (PercentEncodePurpose::Fragment, _) => true,
66 (
68 PercentEncodePurpose::Path,
69 '-'
70 | '.'
71 | '_'
72 | '~'
73 | 'a'..='z'
74 | 'A'..='Z'
75 | '0'..='9'
76 | '!'
77 | '$'
78 | '&'
79 | '\''
80 | '('
81 | ')'
82 | '*'
83 | ','
84 | ';'
85 | '='
86 | ':'
87 | '@'
88 | '/',
89 ) => false,
90 (PercentEncodePurpose::Path, _) => true,
91 (PercentEncodePurpose::UserInfo, '@' | '?' | '#') => true,
92 (PercentEncodePurpose::UserInfo, _) => false,
93 };
94 if !is_reserved && c.is_ascii() {
95 result.push(c);
96 } else {
97 let mut buf = [0; 4];
98 let c_str = c.encode_utf8(&mut buf);
99 for b in c_str.as_bytes() {
100 let d1 = *b >> 4;
101 let d0 = *b & 0xf;
102 result.push('%');
103 result.push(upper_hex_char(d1));
104 result.push(upper_hex_char(d0));
105 }
106 }
107 }
108 result
109}
110
111#[derive(Copy, Clone, Debug, Eq, PartialEq)]
112pub enum UrlParseError {
113 MalformedUrl,
114 PortOutOfRange,
115 InvalidIpAddress,
116 UnknownIpVersion,
117}
118
119#[derive(Clone, Debug, Eq, Ord, PartialEq, PartialOrd)]
120pub struct Url {
121 pub scheme: String,
122 pub user: String,
123 pub host: String,
124 pub ip: Option<IpAddr>,
125 pub port: Option<u16>,
126 pub path: String,
127 pub query: String,
128 pub fragment: String,
129}
130impl Url {
131 #[allow(clippy::missing_panics_doc)]
134 pub fn parse_absolute(url_s: impl AsRef<[u8]>) -> Result<Self, UrlParseError> {
135 let orig_bytes = url_s.as_ref();
178 let matcher: Matcher9<_> = regex!(br"([-.+0-9A-Za-z]+)://(?:([-._~a-zA-Z0-9%!$&'()*,;=:]*)@)?(?:([0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3})|(\[[-._~a-zA-Z0-9%!$&'()*,;=:]+])|([-._~a-zA-Z0-9%!$&'()*,;=]+))(?::([0-9]*))?(/[-._~a-zA-Z0-9%!$&'()*,;=:@/]*)?(?:\?([-._~a-zA-Z0-9%!$&'()*,;=:@/?]*))?(?:#([-._~a-zA-Z0-9%!$&'()*,;=:@/?]*))?");
179 let (
180 scheme_bytes,
181 user_bytes,
182 ipv4_bytes,
183 ipv6_bytes,
184 host_bytes,
185 port_bytes,
186 path_bytes,
187 query_bytes,
188 fragment_bytes,
189 ) = matcher
190 .match_slices(orig_bytes)
191 .ok_or(UrlParseError::MalformedUrl)?;
192 let scheme = std::str::from_utf8(scheme_bytes).unwrap().to_string();
193 let user = std::str::from_utf8(user_bytes).unwrap().to_string();
194 let ip: Option<IpAddr> = if !ipv4_bytes.is_empty() {
195 Some(
196 std::str::from_utf8(ipv4_bytes)
197 .unwrap()
198 .parse::<IpAddr>()
199 .map_err(|_| UrlParseError::InvalidIpAddress)?,
200 )
201 } else if !ipv6_bytes.is_empty() {
202 let b = &ipv6_bytes[1..(ipv6_bytes.len() - 1)];
203 if b[0] == b'v' {
204 return Err(UrlParseError::UnknownIpVersion);
205 }
206 Some(
207 std::str::from_utf8(b)
208 .unwrap()
209 .parse::<IpAddr>()
210 .map_err(|_| UrlParseError::InvalidIpAddress)?,
211 )
212 } else {
213 None
214 };
215 let host = std::str::from_utf8(host_bytes).unwrap().to_string();
216 let port: Option<u16> = match port_bytes.len() {
217 0 => None,
218 1..6 => Some(
219 std::str::from_utf8(port_bytes)
220 .unwrap()
221 .parse::<u32>()
222 .unwrap()
223 .try_into()
224 .map_err(|_| UrlParseError::PortOutOfRange)?,
225 ),
226 _ => return Err(UrlParseError::PortOutOfRange),
227 };
228 let path = percent_decode(std::str::from_utf8(path_bytes).unwrap());
229 let query = std::str::from_utf8(query_bytes).unwrap().to_string();
230 let fragment = std::str::from_utf8(fragment_bytes).unwrap().to_string();
231 Ok(Self {
232 scheme,
233 user,
234 host,
235 ip,
236 port,
237 path,
238 query,
239 fragment,
240 })
241 }
242
243 #[allow(clippy::missing_panics_doc)]
246 pub fn parse_relative(url_s: impl AsRef<[u8]>) -> Result<Self, UrlParseError> {
247 let orig_bytes = url_s.as_ref();
268 let matcher: Matcher3<_> = regex!(br"([-._~a-zA-Z0-9%!$&'()*,;=:@/]*)?(?:\?([-._~a-zA-Z0-9%!$&'()*,;=:@/?]*))?(?:#([-._~a-zA-Z0-9%!$&'()*,;=:@/?]*))?");
269 let (path_bytes, query_bytes, fragment_bytes) = matcher
270 .match_slices(orig_bytes)
271 .ok_or(UrlParseError::MalformedUrl)?;
272 let path = percent_decode(std::str::from_utf8(path_bytes).unwrap());
273 let query = std::str::from_utf8(query_bytes).unwrap().to_string();
274 let fragment = std::str::from_utf8(fragment_bytes).unwrap().to_string();
275 Ok(Self {
276 scheme: String::new(),
277 user: String::new(),
278 host: String::new(),
279 ip: None,
280 port: None,
281 path,
282 query,
283 fragment,
284 })
285 }
286}
287impl Display for Url {
288 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
289 if !self.scheme.is_empty() {
290 write!(f, "{}://", self.scheme)?;
291 }
292 if !self.user.is_empty() {
293 write!(f, "{}@", self.user)?;
294 }
295 if let Some(ip) = self.ip {
296 write!(f, "{ip}")?;
297 } else {
298 write!(f, "{}", self.host)?;
299 }
300 if let Some(port) = self.port {
301 write!(f, ":{port}")?;
302 }
303 write!(
304 f,
305 "{}",
306 percent_encode(&self.path, PercentEncodePurpose::Path)
307 )?;
308 if !self.query.is_empty() {
309 write!(f, "?{}", self.query)?;
310 }
311 if !self.fragment.is_empty() {
312 write!(f, "#{}", self.fragment)?;
313 }
314 Ok(())
315 }
316}