servlin/
url.rs

1use safe_regex::{Matcher3, Matcher9, regex};
2use std::fmt::{Display, Formatter};
3use std::net::IpAddr;
4
5/// # Panics
6/// Panics when `b` is not in `0..=15`.
7#[must_use]
8pub fn upper_hex_char(b: u8) -> char {
9    const TABLE: [char; 16] = [
10        '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F',
11    ];
12    assert!(b < 16, "cannot convert number to hex: {b}");
13    TABLE[b as usize]
14}
15
16#[must_use]
17pub fn from_hex_byte(b: u8) -> Option<u8> {
18    match b {
19        b'0'..=b'9' => Some(b - b'0'),
20        b'a'..=b'f' => Some(10 + b - b'a'),
21        b'A'..=b'F' => Some(10 + b - b'A'),
22        _ => None,
23    }
24}
25
26pub fn percent_decode(bytes: impl AsRef<[u8]>) -> String {
27    // https://datatracker.ietf.org/doc/html/rfc3986#section-2.1
28    let mut result_bytes: Vec<u8> = Vec::new();
29    let bytes = bytes.as_ref();
30    let mut n = 0;
31    while n < bytes.len() {
32        match bytes[n] {
33            b'%' if n + 2 < bytes.len() => {
34                let opt_d1 = from_hex_byte(bytes[n + 1]);
35                let opt_d0 = from_hex_byte(bytes[n + 2]);
36                match (opt_d1, opt_d0) {
37                    (Some(d1), Some(d0)) => {
38                        let b = (d1 << 4) | d0;
39                        result_bytes.push(b);
40                        n += 2;
41                    }
42                    _ => result_bytes.push(b'%'),
43                }
44            }
45            c => result_bytes.push(c),
46        }
47        n += 1;
48    }
49    String::from_utf8_lossy(&result_bytes).to_string()
50}
51
52#[derive(Copy, Clone, Debug, Eq, PartialEq)]
53pub enum PercentEncodePurpose {
54    Fragment,
55    Path,
56    UserInfo,
57}
58
59#[allow(clippy::match_same_arms)]
60pub fn percent_encode(s: impl AsRef<str>, purpose: PercentEncodePurpose) -> String {
61    // https://datatracker.ietf.org/doc/html/rfc3986#section-2.1
62    let mut result = String::new();
63    for c in s.as_ref().chars() {
64        let is_reserved = match (purpose, c) {
65            (PercentEncodePurpose::Fragment, _) => true,
66            // path-abempty
67            (
68                PercentEncodePurpose::Path,
69                '-'
70                | '.'
71                | '_'
72                | '~'
73                | 'a'..='z'
74                | 'A'..='Z'
75                | '0'..='9'
76                | '!'
77                | '$'
78                | '&'
79                | '\''
80                | '('
81                | ')'
82                | '*'
83                | ','
84                | ';'
85                | '='
86                | ':'
87                | '@'
88                | '/',
89            ) => false,
90            (PercentEncodePurpose::Path, _) => true,
91            (PercentEncodePurpose::UserInfo, '@' | '?' | '#') => true,
92            (PercentEncodePurpose::UserInfo, _) => false,
93        };
94        if !is_reserved && c.is_ascii() {
95            result.push(c);
96        } else {
97            let mut buf = [0; 4];
98            let c_str = c.encode_utf8(&mut buf);
99            for b in c_str.as_bytes() {
100                let d1 = *b >> 4;
101                let d0 = *b & 0xf;
102                result.push('%');
103                result.push(upper_hex_char(d1));
104                result.push(upper_hex_char(d0));
105            }
106        }
107    }
108    result
109}
110
111#[derive(Copy, Clone, Debug, Eq, PartialEq)]
112pub enum UrlParseError {
113    MalformedUrl,
114    PortOutOfRange,
115    InvalidIpAddress,
116    UnknownIpVersion,
117}
118
119#[derive(Clone, Debug, Eq, Ord, PartialEq, PartialOrd)]
120pub struct Url {
121    pub scheme: String,
122    pub user: String,
123    pub host: String,
124    pub ip: Option<IpAddr>,
125    pub port: Option<u16>,
126    pub path: String,
127    pub query: String,
128    pub fragment: String,
129}
130impl Url {
131    /// # Errors
132    /// Returns an error when it fails to parse `url_s`.
133    #[allow(clippy::missing_panics_doc)]
134    pub fn parse_absolute(url_s: impl AsRef<[u8]>) -> Result<Self, UrlParseError> {
135        // https://datatracker.ietf.org/doc/html/rfc3986
136        // https://datatracker.ietf.org/doc/html/rfc7230#section-2.7
137        // URI           = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
138        // hier-part     = "//" authority path-abempty
139        //               / path-absolute
140        //               / path-rootless
141        //               / path-empty
142        // authority     = [ userinfo "@" ] host [ ":" port ]
143        // userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )
144        // unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
145        // pct-encoded   = "%" HEXDIG HEXDIG
146        // sub-delims    = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
147        // host          = IP-literal / IPv4address / reg-name
148        // IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"
149        // IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet
150        // IPv6address   =                            6( h16 ":" ) ls32
151        //               /                       "::" 5( h16 ":" ) ls32
152        //               / [               h16 ] "::" 4( h16 ":" ) ls32
153        //               / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
154        //               / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
155        //               / [ *3( h16 ":" ) h16 ] "::"    h16 ":"   ls32
156        //               / [ *4( h16 ":" ) h16 ] "::"              ls32
157        //               / [ *5( h16 ":" ) h16 ] "::"              h16
158        //               / [ *6( h16 ":" ) h16 ] "::"
159        //       ls32    = ( h16 ":" h16 ) / IPv4address
160        //               ; least-significant 32 bits of address
161        //       h16     = 1*4HEXDIG
162        //               ; 16 bits of address represented in hexadecimal
163        // IPvFuture     = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
164        // reg-name      = *( unreserved / pct-encoded / sub-delims )
165        // port          = *DIGIT
166        // path-abempty  = *( "/" segment )
167        // path-absolute = "/" [ segment-nz *( "/" segment ) ]
168        // path-rootless = segment-nz *( "/" segment )
169        // path-empty    = 0<pchar>
170        // segment       = *pchar
171        // segment-nz    = 1*pchar
172        // segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
173        //               ; non-zero-length segment without any colon ":"
174        // pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
175        // query         = *( pchar / "/" / "?" )
176        // fragment      = *( pchar / "/" / "?" )
177        let orig_bytes = url_s.as_ref();
178        let matcher: Matcher9<_> = regex!(br"([-.+0-9A-Za-z]+)://(?:([-._~a-zA-Z0-9%!$&'()*,;=:]*)@)?(?:([0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3})|(\[[-._~a-zA-Z0-9%!$&'()*,;=:]+])|([-._~a-zA-Z0-9%!$&'()*,;=]+))(?::([0-9]*))?(/[-._~a-zA-Z0-9%!$&'()*,;=:@/]*)?(?:\?([-._~a-zA-Z0-9%!$&'()*,;=:@/?]*))?(?:#([-._~a-zA-Z0-9%!$&'()*,;=:@/?]*))?");
179        let (
180            scheme_bytes,
181            user_bytes,
182            ipv4_bytes,
183            ipv6_bytes,
184            host_bytes,
185            port_bytes,
186            path_bytes,
187            query_bytes,
188            fragment_bytes,
189        ) = matcher
190            .match_slices(orig_bytes)
191            .ok_or(UrlParseError::MalformedUrl)?;
192        let scheme = std::str::from_utf8(scheme_bytes).unwrap().to_string();
193        let user = std::str::from_utf8(user_bytes).unwrap().to_string();
194        let ip: Option<IpAddr> = if !ipv4_bytes.is_empty() {
195            Some(
196                std::str::from_utf8(ipv4_bytes)
197                    .unwrap()
198                    .parse::<IpAddr>()
199                    .map_err(|_| UrlParseError::InvalidIpAddress)?,
200            )
201        } else if !ipv6_bytes.is_empty() {
202            let b = &ipv6_bytes[1..(ipv6_bytes.len() - 1)];
203            if b[0] == b'v' {
204                return Err(UrlParseError::UnknownIpVersion);
205            }
206            Some(
207                std::str::from_utf8(b)
208                    .unwrap()
209                    .parse::<IpAddr>()
210                    .map_err(|_| UrlParseError::InvalidIpAddress)?,
211            )
212        } else {
213            None
214        };
215        let host = std::str::from_utf8(host_bytes).unwrap().to_string();
216        let port: Option<u16> = match port_bytes.len() {
217            0 => None,
218            1..6 => Some(
219                std::str::from_utf8(port_bytes)
220                    .unwrap()
221                    .parse::<u32>()
222                    .unwrap()
223                    .try_into()
224                    .map_err(|_| UrlParseError::PortOutOfRange)?,
225            ),
226            _ => return Err(UrlParseError::PortOutOfRange),
227        };
228        let path = percent_decode(std::str::from_utf8(path_bytes).unwrap());
229        let query = std::str::from_utf8(query_bytes).unwrap().to_string();
230        let fragment = std::str::from_utf8(fragment_bytes).unwrap().to_string();
231        Ok(Self {
232            scheme,
233            user,
234            host,
235            ip,
236            port,
237            path,
238            query,
239            fragment,
240        })
241    }
242
243    /// # Errors
244    /// Returns an error when it fails to parse `url_s`.
245    #[allow(clippy::missing_panics_doc)]
246    pub fn parse_relative(url_s: impl AsRef<[u8]>) -> Result<Self, UrlParseError> {
247        // https://datatracker.ietf.org/doc/html/rfc3986
248        // https://datatracker.ietf.org/doc/html/rfc7230#section-2.7
249        // URI           = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
250        // hier-part     = "//" authority path-abempty
251        //               / path-absolute
252        //               / path-rootless
253        //               / path-empty
254        // unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
255        // pct-encoded   = "%" HEXDIG HEXDIG
256        // sub-delims    = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
257        // path-absolute = "/" [ segment-nz *( "/" segment ) ]
258        // path-rootless = segment-nz *( "/" segment )
259        // path-empty    = 0<pchar>
260        // segment       = *pchar
261        // segment-nz    = 1*pchar
262        // segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
263        //               ; non-zero-length segment without any colon ":"
264        // pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
265        // query         = *( pchar / "/" / "?" )
266        // fragment      = *( pchar / "/" / "?" )
267        let orig_bytes = url_s.as_ref();
268        let matcher: Matcher3<_> = regex!(br"([-._~a-zA-Z0-9%!$&'()*,;=:@/]*)?(?:\?([-._~a-zA-Z0-9%!$&'()*,;=:@/?]*))?(?:#([-._~a-zA-Z0-9%!$&'()*,;=:@/?]*))?");
269        let (path_bytes, query_bytes, fragment_bytes) = matcher
270            .match_slices(orig_bytes)
271            .ok_or(UrlParseError::MalformedUrl)?;
272        let path = percent_decode(std::str::from_utf8(path_bytes).unwrap());
273        let query = std::str::from_utf8(query_bytes).unwrap().to_string();
274        let fragment = std::str::from_utf8(fragment_bytes).unwrap().to_string();
275        Ok(Self {
276            scheme: String::new(),
277            user: String::new(),
278            host: String::new(),
279            ip: None,
280            port: None,
281            path,
282            query,
283            fragment,
284        })
285    }
286}
287impl Display for Url {
288    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
289        if !self.scheme.is_empty() {
290            write!(f, "{}://", self.scheme)?;
291        }
292        if !self.user.is_empty() {
293            write!(f, "{}@", self.user)?;
294        }
295        if let Some(ip) = self.ip {
296            write!(f, "{ip}")?;
297        } else {
298            write!(f, "{}", self.host)?;
299        }
300        if let Some(port) = self.port {
301            write!(f, ":{port}")?;
302        }
303        write!(
304            f,
305            "{}",
306            percent_encode(&self.path, PercentEncodePurpose::Path)
307        )?;
308        if !self.query.is_empty() {
309            write!(f, "?{}", self.query)?;
310        }
311        if !self.fragment.is_empty() {
312            write!(f, "#{}", self.fragment)?;
313        }
314        Ok(())
315    }
316}