Skip to main content

veilid_tools/
split_url.rs

1// Loose subset interpretation of the URL standard
2// Not using full Url crate here for no_std compatibility
3//
4// Caveats:
5//   No support for query string parsing
6//   No support for paths with ';' parameters
7//   URLs must convert to UTF8
8//   Only IP address and DNS hostname host fields are supported
9
10use super::*;
11
12fn is_alphanum(c: u8) -> bool {
13    matches!(c,
14        b'A'..=b'Z'
15        | b'a'..=b'z'
16        | b'0'..=b'9'
17    )
18}
19fn is_mark(c: u8) -> bool {
20    matches!(
21        c,
22        b'-' | b'_' | b'.' | b'!' | b'~' | b'*' | b'\'' | b'(' | b')'
23    )
24}
25fn is_unreserved(c: u8) -> bool {
26    is_alphanum(c) || is_mark(c)
27}
28
29fn must_encode_userinfo(c: u8) -> bool {
30    !(is_unreserved(c) || matches!(c, b'%' | b':' | b';' | b'&' | b'=' | b'+' | b'$' | b','))
31}
32
33fn must_encode_path(c: u8) -> bool {
34    !(is_unreserved(c)
35        || matches!(
36            c,
37            b'%' | b'/' | b':' | b'@' | b'&' | b'=' | b'+' | b'$' | b','
38        ))
39}
40
41fn is_valid_scheme<H: AsRef<str>>(host: H) -> bool {
42    let mut chars = host.as_ref().chars();
43    if let Some(ch) = chars.next() {
44        if !ch.is_ascii_alphabetic() {
45            return false;
46        }
47    } else {
48        return false;
49    }
50    for ch in chars {
51        if !matches!(ch,
52            'A'..='Z' | 'a'..='z' | '0'..='9' | '-' | '+' | '.' )
53        {
54            return false;
55        }
56    }
57    true
58}
59
60fn hex_decode(h: u8) -> Result<u8, SplitUrlError> {
61    match h {
62        b'0'..=b'9' => Ok(h - b'0'),
63        b'A'..=b'F' => Ok(h - b'A' + 10),
64        b'a'..=b'f' => Ok(h - b'a' + 10),
65        _ => Err(SplitUrlError::new(
66            "Unexpected character in percent encoding",
67        )),
68    }
69}
70
71fn hex_encode(c: u8) -> (char, char) {
72    let c0 = c >> 4;
73    let c1 = c & 15;
74    (
75        if c0 < 10 {
76            char::from_u32((b'0' + c0) as u32).unwrap_or_log()
77        } else {
78            char::from_u32((b'A' + c0 - 10) as u32).unwrap_or_log()
79        },
80        if c1 < 10 {
81            char::from_u32((b'0' + c1) as u32).unwrap_or_log()
82        } else {
83            char::from_u32((b'A' + c1 - 10) as u32).unwrap_or_log()
84        },
85    )
86}
87
88fn url_decode<S: AsRef<str>>(s: S) -> Result<String, SplitUrlError> {
89    let url = s.as_ref().to_owned();
90    if !url.is_ascii() {
91        return Err(SplitUrlError::new("URL is not in ASCII encoding"));
92    }
93    let url_bytes = url.as_bytes();
94    let mut dec_bytes: Vec<u8> = Vec::with_capacity(url_bytes.len());
95    let mut i = 0;
96    let end = url_bytes.len();
97    while i < end {
98        let mut b = url_bytes[i];
99        i += 1;
100        if b == b'%' {
101            if (i + 1) >= end {
102                return Err(SplitUrlError::new("Invalid URL encoding"));
103            }
104            b = (hex_decode(url_bytes[i])? << 4) | hex_decode(url_bytes[i + 1])?;
105            i += 2;
106        }
107        dec_bytes.push(b);
108    }
109    String::from_utf8(dec_bytes)
110        .map_err(|e| SplitUrlError::new(format!("Decoded URL is not valid UTF-8: {}", e)))
111}
112
113fn url_encode<S: AsRef<str>>(s: S, must_encode: impl Fn(u8) -> bool) -> String {
114    let bytes = s.as_ref().as_bytes();
115    let mut out = String::new();
116    for b in bytes {
117        if must_encode(*b) {
118            let (c0, c1) = hex_encode(*b);
119            out.push('%');
120            out.push(c0);
121            out.push(c1);
122        } else {
123            out.push(char::from_u32(*b as u32).unwrap_or_log())
124        }
125    }
126    out
127}
128
129fn convert_port<N>(port_str: N) -> Result<u16, SplitUrlError>
130where
131    N: AsRef<str>,
132{
133    port_str
134        .as_ref()
135        .parse::<u16>()
136        .map_err(|e| SplitUrlError::new(format!("Invalid port: {}", e)))
137}
138
139///////////////////////////////////////////////////////////////////////////////
140#[derive(ThisError, Debug, Clone, Eq, PartialEq)]
141#[error("SplitUrlError: {0}")]
142pub struct SplitUrlError(String);
143
144impl SplitUrlError {
145    pub fn new<T: ToString>(message: T) -> Self {
146        SplitUrlError(message.to_string())
147    }
148}
149
150///////////////////////////////////////////////////////////////////////////////
151
152#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
153pub struct SplitUrlPath {
154    pub path: String,
155    pub fragment: Option<String>,
156    pub query: Option<String>,
157}
158
159impl SplitUrlPath {
160    pub fn new<P, F, Q>(path: P, fragment: Option<F>, query: Option<Q>) -> Self
161    where
162        P: AsRef<str>,
163        F: AsRef<str>,
164        Q: AsRef<str>,
165    {
166        Self {
167            path: path.as_ref().to_owned(),
168            fragment: fragment.map(|f| f.as_ref().to_owned()),
169            query: query.map(|f| f.as_ref().to_owned()),
170        }
171    }
172}
173
174impl FromStr for SplitUrlPath {
175    type Err = SplitUrlError;
176    fn from_str(s: &str) -> Result<Self, Self::Err> {
177        Ok(if let Some((p, q)) = s.split_once('?') {
178            if let Some((p, f)) = p.split_once('#') {
179                SplitUrlPath::new(url_decode(p)?, Some(url_decode(f)?), Some(q))
180            } else {
181                SplitUrlPath::new(url_decode(p)?, Option::<String>::None, Some(q))
182            }
183        } else if let Some((p, f)) = s.split_once('#') {
184            SplitUrlPath::new(url_decode(p)?, Some(url_decode(f)?), Option::<String>::None)
185        } else {
186            SplitUrlPath::new(
187                url_decode(s)?,
188                Option::<String>::None,
189                Option::<String>::None,
190            )
191        })
192    }
193}
194
195impl fmt::Display for SplitUrlPath {
196    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
197        if let Some(fragment) = &self.fragment {
198            if let Some(query) = &self.query {
199                write!(
200                    f,
201                    "{}#{}?{}",
202                    url_encode(&self.path, must_encode_path),
203                    url_encode(fragment, must_encode_path),
204                    query
205                )
206            } else {
207                write!(f, "{}#{}", self.path, fragment)
208            }
209        } else if let Some(query) = &self.query {
210            write!(f, "{}?{}", url_encode(&self.path, must_encode_path), query)
211        } else {
212            write!(f, "{}", url_encode(&self.path, must_encode_path))
213        }
214    }
215}
216
217///////////////////////////////////////////////////////////////////////////////
218#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
219pub enum SplitUrlHost {
220    Hostname(String),
221    IpAddr(IpAddr),
222}
223
224impl SplitUrlHost {
225    pub fn new<S: AsRef<str>>(s: S) -> Result<Self, SplitUrlError> {
226        Self::from_str(s.as_ref())
227    }
228}
229
230impl FromStr for SplitUrlHost {
231    type Err = SplitUrlError;
232
233    fn from_str(s: &str) -> Result<Self, Self::Err> {
234        if s.is_empty() {
235            return Err(SplitUrlError::new("Host is empty"));
236        }
237        if let Ok(v4) = Ipv4Addr::from_str(s) {
238            return Ok(SplitUrlHost::IpAddr(IpAddr::V4(v4)));
239        }
240        if &s[0..1] == "[" && &s[s.len() - 1..] == "]" {
241            if let Ok(v6) = Ipv6Addr::from_str(&s[1..s.len() - 1]) {
242                return Ok(SplitUrlHost::IpAddr(IpAddr::V6(v6)));
243            }
244            return Err(SplitUrlError::new("Invalid ipv6 address"));
245        }
246        for ch in s.chars() {
247            if !matches!(ch,
248                'A'..='Z' | 'a'..='z' | '0'..='9' | '-' | '.' )
249            {
250                return Err(SplitUrlError::new("Invalid hostname"));
251            }
252        }
253        Ok(SplitUrlHost::Hostname(s.to_owned()))
254    }
255}
256impl fmt::Display for SplitUrlHost {
257    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
258        match self {
259            Self::Hostname(h) => {
260                write!(f, "{}", h)
261            }
262            Self::IpAddr(IpAddr::V4(v4)) => {
263                write!(f, "{}", v4)
264            }
265            Self::IpAddr(IpAddr::V6(v6)) => {
266                write!(f, "[{}]", v6)
267            }
268        }
269    }
270}
271
272///////////////////////////////////////////////////////////////////////////////
273
274#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
275pub struct SplitUrl {
276    pub scheme: String,
277    pub userinfo: Option<String>,
278    pub host: SplitUrlHost,
279    pub port: Option<u16>,
280    pub path: Option<SplitUrlPath>,
281}
282
283impl SplitUrl {
284    pub fn new<S>(
285        scheme: S,
286        userinfo: Option<String>,
287        host: SplitUrlHost,
288        port: Option<u16>,
289        path: Option<SplitUrlPath>,
290    ) -> Self
291    where
292        S: AsRef<str>,
293    {
294        Self {
295            scheme: scheme.as_ref().to_owned(),
296            userinfo,
297            host,
298            port,
299            path,
300        }
301    }
302
303    #[must_use]
304    pub fn host_port(&self, default_port: u16) -> String {
305        format!("{}:{}", self.host, self.port.unwrap_or(default_port))
306    }
307}
308
309fn split_host_with_port(s: &str) -> Option<(&str, &str)> {
310    // special case for ipv6 colons
311    if s.len() > 2 && s[0..1] == *"[" {
312        if let Some(end) = s.find(']') {
313            if end < (s.len() - 2) && s[end + 1..end + 2] == *":" {
314                return Some((&s[0..end + 1], &s[end + 2..]));
315            }
316        }
317        None
318    } else {
319        s.split_once(':')
320    }
321}
322
323impl FromStr for SplitUrl {
324    type Err = SplitUrlError;
325    fn from_str(s: &str) -> Result<Self, Self::Err> {
326        if let Some((scheme, mut rest)) = s.split_once("://") {
327            if !is_valid_scheme(scheme) {
328                return Err(SplitUrlError::new("Invalid scheme specified"));
329            }
330            let userinfo = {
331                if let Some((userinfo_str, after)) = rest.split_once('@') {
332                    rest = after;
333                    Some(url_decode(userinfo_str)?)
334                } else {
335                    None
336                }
337            };
338            if let Some((host, rest)) = split_host_with_port(rest) {
339                let host = SplitUrlHost::from_str(host)?;
340                if let Some((portstr, path)) = rest.split_once('/') {
341                    let port = convert_port(portstr)?;
342                    let path = SplitUrlPath::from_str(path)?;
343                    Ok(SplitUrl::new(
344                        scheme,
345                        userinfo,
346                        host,
347                        Some(port),
348                        Some(path),
349                    ))
350                } else {
351                    let port = convert_port(rest)?;
352                    Ok(SplitUrl::new(scheme, userinfo, host, Some(port), None))
353                }
354            } else if let Some((host, path)) = rest.split_once('/') {
355                let host = SplitUrlHost::from_str(host)?;
356                let path = SplitUrlPath::from_str(path)?;
357                Ok(SplitUrl::new(scheme, userinfo, host, None, Some(path)))
358            } else {
359                let host = SplitUrlHost::from_str(rest)?;
360                Ok(SplitUrl::new(scheme, userinfo, host, None, None))
361            }
362        } else {
363            Err(SplitUrlError::new("No scheme specified"))
364        }
365    }
366}
367
368impl fmt::Display for SplitUrl {
369    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
370        let hostname = {
371            if let Some(userinfo) = &self.userinfo {
372                let userinfo = url_encode(userinfo, must_encode_userinfo);
373                if let Some(port) = self.port {
374                    format!("{}@{}:{}", userinfo, self.host, port)
375                } else {
376                    format!("{}@{}", userinfo, self.host)
377                }
378            } else if let Some(port) = self.port {
379                format!("{}:{}", self.host, port)
380            } else {
381                format!("{}", self.host)
382            }
383        };
384        if let Some(path) = &self.path {
385            write!(f, "{}://{}/{}", self.scheme, hostname, path)
386        } else {
387            write!(f, "{}://{}", self.scheme, hostname)
388        }
389    }
390}