no_std_http/uri/
scheme.rs

1use core::convert::TryFrom;
2use core::fmt;
3use core::hash::{Hash, Hasher};
4use core::str::FromStr;
5
6use alloc::boxed::Box;
7use bytes::Bytes;
8
9use super::{ErrorKind, InvalidUri};
10use crate::byte_str::ByteStr;
11
12/// Represents the scheme component of a URI
13#[derive(Clone)]
14pub struct Scheme {
15    pub(super) inner: SchemeInner,
16}
17
18#[derive(Clone, Debug)]
19pub(super) enum SchemeInner<T = Box<ByteStr>> {
20    None,
21    Standard(Protocol),
22    Other(T),
23}
24
25#[derive(Copy, Clone, Debug)]
26pub(super) enum Protocol {
27    Http,
28    Https,
29}
30
31impl Scheme {
32    /// HTTP protocol scheme
33    pub const HTTP: Scheme = Scheme {
34        inner: SchemeInner::Standard(Protocol::Http),
35    };
36
37    /// HTTP protocol over TLS.
38    pub const HTTPS: Scheme = Scheme {
39        inner: SchemeInner::Standard(Protocol::Https),
40    };
41
42    pub(super) fn empty() -> Self {
43        Scheme {
44            inner: SchemeInner::None,
45        }
46    }
47
48    /// Return a str representation of the scheme
49    ///
50    /// # Examples
51    ///
52    /// ```
53    /// # use http::uri::*;
54    /// let scheme: Scheme = "http".parse().unwrap();
55    /// assert_eq!(scheme.as_str(), "http");
56    /// ```
57    #[inline]
58    pub fn as_str(&self) -> &str {
59        use self::Protocol::*;
60        use self::SchemeInner::*;
61
62        match self.inner {
63            Standard(Http) => "http",
64            Standard(Https) => "https",
65            Other(ref v) => &v[..],
66            None => unreachable!(),
67        }
68    }
69}
70
71impl<'a> TryFrom<&'a [u8]> for Scheme {
72    type Error = InvalidUri;
73    #[inline]
74    fn try_from(s: &'a [u8]) -> Result<Self, Self::Error> {
75        use self::SchemeInner::*;
76
77        match SchemeInner::parse_exact(s)? {
78            None => Err(ErrorKind::InvalidScheme.into()),
79            Standard(p) => Ok(Standard(p).into()),
80            Other(_) => {
81                let bytes = Bytes::copy_from_slice(s);
82
83                // Safety: postcondition on parse_exact() means that s and
84                // hence bytes are valid UTF-8.
85                let string = unsafe { ByteStr::from_utf8_unchecked(bytes) };
86
87                Ok(Other(Box::new(string)).into())
88            }
89        }
90    }
91}
92
93impl<'a> TryFrom<&'a str> for Scheme {
94    type Error = InvalidUri;
95    #[inline]
96    fn try_from(s: &'a str) -> Result<Self, Self::Error> {
97        TryFrom::try_from(s.as_bytes())
98    }
99}
100
101impl FromStr for Scheme {
102    type Err = InvalidUri;
103
104    fn from_str(s: &str) -> Result<Self, Self::Err> {
105        TryFrom::try_from(s)
106    }
107}
108
109impl fmt::Debug for Scheme {
110    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
111        fmt::Debug::fmt(self.as_str(), f)
112    }
113}
114
115impl fmt::Display for Scheme {
116    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
117        f.write_str(self.as_str())
118    }
119}
120
121impl AsRef<str> for Scheme {
122    #[inline]
123    fn as_ref(&self) -> &str {
124        self.as_str()
125    }
126}
127
128impl PartialEq for Scheme {
129    fn eq(&self, other: &Scheme) -> bool {
130        use self::Protocol::*;
131        use self::SchemeInner::*;
132
133        match (&self.inner, &other.inner) {
134            (&Standard(Http), &Standard(Http)) => true,
135            (&Standard(Https), &Standard(Https)) => true,
136            (Other(a), Other(b)) => a.eq_ignore_ascii_case(b),
137            (&None, _) | (_, &None) => unreachable!(),
138            _ => false,
139        }
140    }
141}
142
143impl Eq for Scheme {}
144
145/// Case-insensitive equality
146///
147/// # Examples
148///
149/// ```
150/// # use http::uri::Scheme;
151/// let scheme: Scheme = "HTTP".parse().unwrap();
152/// assert_eq!(scheme, *"http");
153/// ```
154impl PartialEq<str> for Scheme {
155    fn eq(&self, other: &str) -> bool {
156        self.as_str().eq_ignore_ascii_case(other)
157    }
158}
159
160/// Case-insensitive equality
161impl PartialEq<Scheme> for str {
162    fn eq(&self, other: &Scheme) -> bool {
163        other == self
164    }
165}
166
167/// Case-insensitive hashing
168impl Hash for Scheme {
169    fn hash<H>(&self, state: &mut H)
170    where
171        H: Hasher,
172    {
173        match self.inner {
174            SchemeInner::None => (),
175            SchemeInner::Standard(Protocol::Http) => state.write_u8(1),
176            SchemeInner::Standard(Protocol::Https) => state.write_u8(2),
177            SchemeInner::Other(ref other) => {
178                other.len().hash(state);
179                for &b in other.as_bytes() {
180                    state.write_u8(b.to_ascii_lowercase());
181                }
182            }
183        }
184    }
185}
186
187impl<T> SchemeInner<T> {
188    pub(super) fn is_none(&self) -> bool {
189        matches!(*self, SchemeInner::None)
190    }
191}
192
193// Require the scheme to not be too long in order to enable further
194// optimizations later.
195const MAX_SCHEME_LEN: usize = 64;
196
197// scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
198//
199// SCHEME_CHARS is a table of valid characters in the scheme part of a URI.  An
200// entry in the table is 0 for invalid characters. For valid characters the
201// entry is itself (i.e.  the entry for 43 is b'+' because b'+' == 43u8). An
202// important characteristic of this table is that all entries above 127 are
203// invalid. This makes all of the valid entries a valid single-byte UTF-8 code
204// point. This means that a slice of such valid entries is valid UTF-8.
205#[rustfmt::skip]
206const SCHEME_CHARS: [u8; 256] = [
207    //  0      1      2      3      4      5      6      7      8      9
208        0,     0,     0,     0,     0,     0,     0,     0,     0,     0, //   x
209        0,     0,     0,     0,     0,     0,     0,     0,     0,     0, //  1x
210        0,     0,     0,     0,     0,     0,     0,     0,     0,     0, //  2x
211        0,     0,     0,     0,     0,     0,     0,     0,     0,     0, //  3x
212        0,     0,     0,  b'+',     0,  b'-',  b'.',     0,  b'0',  b'1', //  4x
213     b'2',  b'3',  b'4',  b'5',  b'6',  b'7',  b'8',  b'9',  b':',     0, //  5x
214        0,     0,     0,     0,     0,  b'A',  b'B',  b'C',  b'D',  b'E', //  6x
215     b'F',  b'G',  b'H',  b'I',  b'J',  b'K',  b'L',  b'M',  b'N',  b'O', //  7x
216     b'P',  b'Q',  b'R',  b'S',  b'T',  b'U',  b'V',  b'W',  b'X',  b'Y', //  8x
217     b'Z',     0,     0,     0,     0,     0,     0,  b'a',  b'b',  b'c', //  9x
218     b'd',  b'e',  b'f',  b'g',  b'h',  b'i',  b'j',  b'k',  b'l',  b'm', // 10x
219     b'n',  b'o',  b'p',  b'q',  b'r',  b's',  b't',  b'u',  b'v',  b'w', // 11x
220     b'x',  b'y',  b'z',     0,     0,     0,  b'~',     0,     0,     0, // 12x
221        0,     0,     0,     0,     0,     0,     0,     0,     0,     0, // 13x
222        0,     0,     0,     0,     0,     0,     0,     0,     0,     0, // 14x
223        0,     0,     0,     0,     0,     0,     0,     0,     0,     0, // 15x
224        0,     0,     0,     0,     0,     0,     0,     0,     0,     0, // 16x
225        0,     0,     0,     0,     0,     0,     0,     0,     0,     0, // 17x
226        0,     0,     0,     0,     0,     0,     0,     0,     0,     0, // 18x
227        0,     0,     0,     0,     0,     0,     0,     0,     0,     0, // 19x
228        0,     0,     0,     0,     0,     0,     0,     0,     0,     0, // 20x
229        0,     0,     0,     0,     0,     0,     0,     0,     0,     0, // 21x
230        0,     0,     0,     0,     0,     0,     0,     0,     0,     0, // 22x
231        0,     0,     0,     0,     0,     0,     0,     0,     0,     0, // 23x
232        0,     0,     0,     0,     0,     0,     0,     0,     0,     0, // 24x
233        0,     0,     0,     0,     0,     0                              // 25x
234];
235
236impl SchemeInner<usize> {
237    // Postcondition: On all Ok() returns, s is valid UTF-8
238    fn parse_exact(s: &[u8]) -> Result<SchemeInner<()>, InvalidUri> {
239        match s {
240            b"http" => Ok(Protocol::Http.into()),
241            b"https" => Ok(Protocol::Https.into()),
242            _ => {
243                if s.len() > MAX_SCHEME_LEN {
244                    return Err(ErrorKind::SchemeTooLong.into());
245                }
246
247                // check that each byte in s is a SCHEME_CHARS which implies
248                // that it is a valid single byte UTF-8 code point.
249                for &b in s {
250                    match SCHEME_CHARS[b as usize] {
251                        b':' => {
252                            // Don't want :// here
253                            return Err(ErrorKind::InvalidScheme.into());
254                        }
255                        0 => {
256                            return Err(ErrorKind::InvalidScheme.into());
257                        }
258                        _ => {}
259                    }
260                }
261
262                Ok(SchemeInner::Other(()))
263            }
264        }
265    }
266
267    pub(super) fn parse(s: &[u8]) -> Result<SchemeInner<usize>, InvalidUri> {
268        if s.len() >= 7 {
269            // Check for HTTP
270            if s[..7].eq_ignore_ascii_case(b"http://") {
271                // Prefix will be striped
272                return Ok(Protocol::Http.into());
273            }
274        }
275
276        if s.len() >= 8 {
277            // Check for HTTPs
278            if s[..8].eq_ignore_ascii_case(b"https://") {
279                return Ok(Protocol::Https.into());
280            }
281        }
282
283        if s.len() > 3 {
284            for i in 0..s.len() {
285                let b = s[i];
286
287                match SCHEME_CHARS[b as usize] {
288                    b':' => {
289                        // Not enough data remaining
290                        if s.len() < i + 3 {
291                            break;
292                        }
293
294                        // Not a scheme
295                        if &s[i + 1..i + 3] != b"//" {
296                            break;
297                        }
298
299                        if i > MAX_SCHEME_LEN {
300                            return Err(ErrorKind::SchemeTooLong.into());
301                        }
302
303                        // Return scheme
304                        return Ok(SchemeInner::Other(i));
305                    }
306                    // Invalid scheme character, abort
307                    0 => break,
308                    _ => {}
309                }
310            }
311        }
312
313        Ok(SchemeInner::None)
314    }
315}
316
317impl Protocol {
318    pub(super) fn len(&self) -> usize {
319        match *self {
320            Protocol::Http => 4,
321            Protocol::Https => 5,
322        }
323    }
324}
325
326impl<T> From<Protocol> for SchemeInner<T> {
327    fn from(src: Protocol) -> Self {
328        SchemeInner::Standard(src)
329    }
330}
331
332#[doc(hidden)]
333impl From<SchemeInner> for Scheme {
334    fn from(src: SchemeInner) -> Self {
335        Scheme { inner: src }
336    }
337}
338
339#[cfg(test)]
340mod test {
341    use super::*;
342
343    #[test]
344    fn scheme_eq_to_str() {
345        assert_eq!(&scheme("http"), "http");
346        assert_eq!(&scheme("https"), "https");
347        assert_eq!(&scheme("ftp"), "ftp");
348        assert_eq!(&scheme("my+funky+scheme"), "my+funky+scheme");
349    }
350
351    #[test]
352    fn invalid_scheme_is_error() {
353        Scheme::try_from("my_funky_scheme").expect_err("Unexpectedly valid Scheme");
354
355        // Invalid UTF-8
356        Scheme::try_from([0xC0].as_ref()).expect_err("Unexpectedly valid Scheme");
357    }
358
359    fn scheme(s: &str) -> Scheme {
360        s.parse()
361            .unwrap_or_else(|_| panic!("Invalid scheme: {}", s))
362    }
363}