uhttp_media_type/
lib.rs

1//! This crate provides a zero-allocation, slice-based parser for [HTTP Media
2//! Types](https://tools.ietf.org/html/rfc7231#section-3.1.1) as they appear in
3//! `Content-Type` and `Accept` headers.
4//!
5//! ## Example
6//!
7//! ```rust
8//! use uhttp_media_type::{MediaType, MediaParams, ParamValue};
9//!
10//! let mt = MediaType::new("application/json; charset=utf-8; param=\"a value\"").unwrap();
11//! assert_eq!(mt.mimetype, "application/json");
12//! assert_eq!(mt.parts().unwrap(), ("application", "json"));
13//! assert_eq!(mt.params, " charset=utf-8; param=\"a value\"");
14//!
15//! let mut params = MediaParams::new(mt.params);
16//!
17//! let (key, val) = params.next().unwrap().unwrap();
18//! assert_eq!(key, "charset");
19//! assert_eq!(val, ParamValue::Unquoted("utf-8"));
20//! assert_eq!(val.inner(), "utf-8");
21//!
22//! let (key, val) = params.next().unwrap().unwrap();
23//! assert_eq!(key, "param");
24//! assert_eq!(val, ParamValue::Quoted("a value"));
25//! assert_eq!(val.inner(), "a value");
26//!
27//! assert!(params.next().is_none());
28//! ```
29
30#![feature(field_init_shorthand)]
31
32extern crate memchr;
33
34use memchr::memchr;
35
36/// Parses a media type field into its MIME type and parameter components.
37#[derive(Copy, Clone, Eq, PartialEq, Debug, Hash)]
38pub struct MediaType<'a> {
39    /// The MIME type should have form `type/subtype` for some media type and subtype.
40    ///
41    /// This is guaranteed to be nonempty and free of surrounding whitespace but is not
42    /// guaranteed to be in the correct syntax. It requires case-insensitive comparison to
43    /// other strings [RFC7231§3.1.1.1].
44    pub mimetype: &'a str,
45
46    /// Parameter string for the MIME type.
47    pub params: &'a str,
48}
49
50impl<'a> MediaType<'a> {
51    /// Try to parse a `MediaType` from the given string.
52    pub fn new(s: &'a str) -> Result<Self, ()> {
53        // Split on the ';' that begins parameters, or use the whole string.
54        let (mimetype, params) = match memchr(b';', s.as_bytes()) {
55            Some(idx) => {
56                let (l, r) = s.split_at(idx);
57                (l, &r[1..])
58            },
59            None => (s, &""[..]),
60        };
61
62        // Mimetype may have surrounding whitespace [RFC7231§3.1.1.1].
63        let mimetype = mimetype.trim();
64
65        // Mimetype should be nonempty [RFC7231§3.1.1.1].
66        if mimetype.is_empty() {
67            return Err(());
68        }
69
70        Ok(MediaType { mimetype, params })
71    }
72
73    /// Try to retrieve the type and subtype, in that order, of the current MIME type.
74    ///
75    /// Each part may be empty or contain whitespace and requires case-insensitive
76    /// comparison to other strings.
77    pub fn parts(&self) -> Result<(&'a str, &'a str), ()> {
78        let mut parts = self.mimetype.split('/');
79
80        let main = parts.next().ok_or(())?;
81        let sub = parts.next().ok_or(())?;
82
83        if parts.next().is_none() {
84            Ok((main, sub))
85        } else {
86            Err(())
87        }
88    }
89}
90
91/// Iterator over key/value pairs in a media type parameter string.
92///
93/// Each iteration yields a `(key, value)` for the key and value of a parameter. The key
94/// is guaranteed to be free of surrounding whitespace but is not guaranteed to be free of
95/// internal whitespace. It requires case-insensitive comparison to other strings. The
96/// value has the guarantees of `ParamValue` and doesn't necessarily require
97/// case-insensitive comparison.
98#[derive(Copy, Clone, Debug, Hash)]
99pub struct MediaParams<'a>(&'a str);
100
101impl<'a> MediaParams<'a> {
102    /// Create a new `MediaParams` iterator over the given parameters string.
103    pub fn new(s: &'a str) -> Self {
104        MediaParams(s)
105    }
106}
107
108impl<'a> Iterator for MediaParams<'a> {
109    type Item = Result<(&'a str, ParamValue<'a>), ()>;
110
111    fn next(&mut self) -> Option<Self::Item> {
112        let (key, rest) = match memchr(b'=', self.0.as_bytes()) {
113            Some(idx) => self.0.split_at(idx),
114            None => return None,
115        };
116
117        // Key may have leading whitespace [RFC7231§3.1.1.1]. This may violate the syntax
118        // requirement of no whitespace around the '=' separator, but the requirement
119        // doesn't seem necessary to the rest of the grammar, isn't given any rationale,
120        // and doesn't seem worth the complexity to check it.
121        let key = key.trim();
122
123        let (val, rest) = match ParamValue::new(&rest[1..]) {
124            Ok(v) => v,
125            Err(()) => return Some(Err(())),
126        };
127
128        self.0 = rest;
129
130        Some(Ok((key, val)))
131    }
132}
133
134/// A value for a media type parameter.
135#[derive(Copy, Clone, Eq, PartialEq, Debug, Hash)]
136pub enum ParamValue<'a> {
137    /// Value is in simple, unquoted form.
138    ///
139    /// The inner string is guaranteed to be nonempty and free of surrounding whitespace
140    /// but may contain internal whitespace.
141    Unquoted(&'a str),
142    /// Value is in quoted form.
143    ///
144    /// The inner string contains the text inside the quotes, which may empty, may contain
145    /// internal and surrounding whitespace, or may contain backslash-escaped characters
146    /// that require further processing.
147    Quoted(&'a str),
148}
149
150impl<'a> ParamValue<'a> {
151    fn new(s: &'a str) -> Result<(Self, &'a str), ()> {
152        if s.is_empty() {
153            return Err(());
154        }
155
156        if s.starts_with('"') {
157            let len = find_end_quote(s.as_bytes()).ok_or(())?;
158
159            // Skip over beginning quote and extract value.
160            let (val, rest) = (&s[1..]).split_at(len);
161            // Skip over ending quote.
162            let rest = &rest[1..];
163
164            // Extract any text between ending quote and semicolon.
165            let (leftover, rest) = match memchr(b';', rest.as_bytes()) {
166                Some(idx) => {
167                    let (l, r) = rest.split_at(idx);
168                    (l, &r[1..])
169                },
170                None => (rest, &""[..]),
171            };
172
173            // Verify that text between ending quote and semicolon contains only
174            // whitespace.
175            if !leftover.trim().is_empty() {
176                return Err(());
177            }
178
179            Ok((ParamValue::Quoted(val), rest))
180        } else {
181            let (val, rest) = match memchr(b';', s.as_bytes()) {
182                Some(idx) => {
183                    let (l, r) = s.split_at(idx);
184                    (l, &r[1..])
185                },
186                None => (s, &""[..]),
187            };
188
189            // Value may have surrounding whitespace [RFC7231§3.1.1.1].
190            let val = val.trim();
191
192            // Unquoted value must be nonempty [RFC7231§3.1.1.1].
193            if val.is_empty() {
194                return Err(());
195            }
196
197            Ok((ParamValue::Unquoted(val), rest))
198        }
199    }
200
201    /// Retrieve the inner text value of the parameter.
202    pub fn inner(&self) -> &'a str {
203        match *self {
204            ParamValue::Unquoted(s) => s,
205            ParamValue::Quoted(s) => s,
206        }
207    }
208}
209
210/// Find the terminating quote in the given string, skipping backslash-escaped quotes, and
211/// return the number of bytes within the quotes.
212fn find_end_quote(s: &[u8]) -> Option<usize> {
213    debug_assert!(s[0] == b'"');
214
215    // Start after the beginning quote.
216    let start = match s.get(1..) {
217        Some(x) => x,
218        None => return None,
219    };
220
221    // Current slice being searched for quotes.
222    let mut cur = start;
223    // Current length of text (bytes) within quotes.
224    let mut len = 0;
225
226    loop {
227        // Find the next quote.
228        let idx = match memchr(b'"', cur) {
229            Some(idx) => idx,
230            None => return None,
231        };
232
233        // Include everything up to the quote.
234        len += idx;
235
236        let (text, rest) = cur.split_at(idx);
237
238        if text.is_empty() || escape_count(text) % 2 == 0 {
239            break;
240        }
241
242        // Include the escaped quote.
243        len += 1;
244
245        // Try to move past the quote.
246        cur = match rest.get(1..) {
247            Some(x) => x,
248            None => return None,
249        };
250    }
251
252    Some(len)
253}
254
255/// Count the number of contiguous escape characters (backslashes) that exist at the end
256/// of the given slice.
257fn escape_count(s: &[u8]) -> usize {
258    s.iter().rev().take_while(|&&b| b == b'\\').fold(0, |s, _| s + 1)
259}
260
261#[cfg(test)]
262mod test {
263    use super::*;
264
265    #[test]
266    fn test_media_type() {
267        let m = MediaType::new("text/html;charset=utf-8").unwrap();
268        assert_eq!(m.mimetype, "text/html");
269        assert_eq!(m.params, "charset=utf-8");
270        let (main, sub) = m.parts().unwrap();
271        assert_eq!(main, "text");
272        assert_eq!(sub, "html");
273
274        let m = MediaType::new("text/*;charset=utf-8").unwrap();
275        assert_eq!(m.mimetype, "text/*");
276        assert_eq!(m.params, "charset=utf-8");
277        let (main, sub) = m.parts().unwrap();
278        assert_eq!(main, "text");
279        assert_eq!(sub, "*");
280
281        let m = MediaType::new("image/*").unwrap();
282        assert_eq!(m.mimetype, "image/*");
283        assert_eq!(m.params, "");
284        let (main, sub) = m.parts().unwrap();
285        assert_eq!(main, "image");
286        assert_eq!(sub, "*");
287
288        let m = MediaType::new("text/json").unwrap();
289        assert_eq!(m.mimetype, "text/json");
290        assert_eq!(m.params, "");
291        let (main, sub) = m.parts().unwrap();
292        assert_eq!(main, "text");
293        assert_eq!(sub, "json");
294
295        let m = MediaType::new("text/json ;").unwrap();
296        assert_eq!(m.mimetype, "text/json");
297        assert_eq!(m.params, "");
298        let (main, sub) = m.parts().unwrap();
299        assert_eq!(main, "text");
300        assert_eq!(sub, "json");
301
302        let m = MediaType::new("text/json ;    ").unwrap();
303        assert_eq!(m.mimetype, "text/json");
304        assert_eq!(m.params, "    ");
305        let (main, sub) = m.parts().unwrap();
306        assert_eq!(main, "text");
307        assert_eq!(sub, "json");
308
309        let m = MediaType::new("text/html; charset=\"utf-8\"").unwrap();
310        assert_eq!(m.mimetype, "text/html");
311        assert_eq!(m.params, " charset=\"utf-8\"");
312        let (main, sub) = m.parts().unwrap();
313        assert_eq!(main, "text");
314        assert_eq!(sub, "html");
315
316        let m = MediaType::new("\t\t    text/html  \t; charset=utf-8    \t\t").unwrap();
317        assert_eq!(m.mimetype, "text/html");
318        assert_eq!(m.params, " charset=utf-8    \t\t");
319        let (main, sub) = m.parts().unwrap();
320        assert_eq!(main, "text");
321        assert_eq!(sub, "html");
322
323        let m = MediaType::new(" text  /\t*  \t; charset=utf-8").unwrap();
324        assert_eq!(m.mimetype, "text  /\t*");
325        assert_eq!(m.params, " charset=utf-8");
326        let (main, sub) = m.parts().unwrap();
327        assert_eq!(main, "text  ");
328        assert_eq!(sub, "\t*");
329
330        let m = MediaType::new("\t\t    text/html  \t; charset=utf-8    \t\t").unwrap();
331        assert_eq!(m.mimetype, "text/html");
332        assert_eq!(m.params, " charset=utf-8    \t\t");
333        let (main, sub) = m.parts().unwrap();
334        assert_eq!(main, "text");
335        assert_eq!(sub, "html");
336
337        let m = MediaType::new("text/hello space").unwrap();
338        assert_eq!(m.mimetype, "text/hello space");
339        assert_eq!(m.params, "");
340        let (main, sub) = m.parts().unwrap();
341        assert_eq!(main, "text");
342        assert_eq!(sub, "hello space");
343
344        let m = MediaType::new("image/").unwrap();
345        assert_eq!(m.mimetype, "image/");
346        assert_eq!(m.params, "");
347        let (main, sub) = m.parts().unwrap();
348        assert_eq!(main, "image");
349        assert_eq!(sub, "");
350
351        let m = MediaType::new("image/    ").unwrap();
352        assert_eq!(m.mimetype, "image/");
353        assert_eq!(m.params, "");
354        let (main, sub) = m.parts().unwrap();
355        assert_eq!(main, "image");
356        assert_eq!(sub, "");
357
358        let m = MediaType::new("/json").unwrap();
359        assert_eq!(m.mimetype, "/json");
360        assert_eq!(m.params, "");
361        let (main, sub) = m.parts().unwrap();
362        assert_eq!(main, "");
363        assert_eq!(sub, "json");
364
365        let m = MediaType::new("   /json").unwrap();
366        assert_eq!(m.mimetype, "/json");
367        assert_eq!(m.params, "");
368        let (main, sub) = m.parts().unwrap();
369        assert_eq!(main, "");
370        assert_eq!(sub, "json");
371
372        let m = MediaType::new("/").unwrap();
373        assert_eq!(m.mimetype, "/");
374        assert_eq!(m.params, "");
375        let (main, sub) = m.parts().unwrap();
376        assert_eq!(main, "");
377        assert_eq!(sub, "");
378
379        let m = MediaType::new("\t\t /   ").unwrap();
380        assert_eq!(m.mimetype, "/");
381        assert_eq!(m.params, "");
382        let (main, sub) = m.parts().unwrap();
383        assert_eq!(main, "");
384        assert_eq!(sub, "");
385
386        assert!(MediaType::new("").is_err());
387        assert!(MediaType::new("   \t").is_err());
388        assert!(MediaType::new("   \t; charet=utf8").is_err());
389
390        let m = MediaType::new("text ; charset=utf8").unwrap();
391        assert_eq!(m.mimetype, "text");
392        assert_eq!(m.params, " charset=utf8");
393        assert!(m.parts().is_err());
394    }
395
396    #[test]
397    fn test_escape_count() {
398        assert_eq!(escape_count(br"\"), 1);
399        assert_eq!(escape_count(br"\\"), 2);
400        assert_eq!(escape_count(br"\\\"), 3);
401        assert_eq!(escape_count(br"\\\\"), 4);
402        assert_eq!(escape_count(br"42 \\\\"), 4);
403        assert_eq!(escape_count(br"\\ 42 \\\\"), 4);
404        assert_eq!(escape_count(br"\\ \\\\"), 4);
405        assert_eq!(escape_count(br"\\a\\\\"), 4);
406    }
407
408    #[test]
409    fn test_find_end_quote() {
410        assert_eq!(find_end_quote(b"\""), None);
411        assert_eq!(find_end_quote(b"\"\""), Some(b"".len()));
412        assert_eq!(find_end_quote(b"\"utf-8\""), Some(b"utf-8".len()));
413        assert_eq!(find_end_quote(b"\"'utf-8'\""), Some(b"'utf-8'".len()));
414        assert_eq!(find_end_quote(b"\"utf-8\"; key=value"), Some(b"utf-8".len()));
415
416        assert_eq!(find_end_quote(b"\"hello \\\"world\\\" 1337\""),
417            Some(b"hello \\\"world\\\" 1337".len()));
418
419        assert_eq!(find_end_quote(b"\"abcd fghi\\\" jklm \"; nopq"),
420            Some(b"abcd fghi\\\" jklm ".len()));
421
422        assert_eq!(find_end_quote(b"\"utf-8; key=value"), None);
423        assert_eq!(find_end_quote(b"\"utf-8\\\"; key=value"), None);
424    }
425
426    #[test]
427    fn test_param_value() {
428        assert_eq!(ParamValue::new(""), Err(()));
429        assert_eq!(ParamValue::new("  \t"), Err(()));
430        assert_eq!(ParamValue::new("  \t;charset=utf8"), Err(()));
431        assert_eq!(ParamValue::new("\""), Err(()));
432
433        assert_eq!(ParamValue::new("\"\""), Ok((
434            ParamValue::Quoted(&""[..]),
435            &""[..],
436        )));
437
438        assert_eq!(ParamValue::new("\"\""), Ok((
439            ParamValue::Quoted(&""[..]),
440            &""[..],
441        )));
442
443        assert_eq!(ParamValue::new("\"utf-8\""), Ok((
444            ParamValue::Quoted(&"utf-8"[..]),
445            &""[..],
446        )));
447
448        assert_eq!(ParamValue::new("\"utf-8, \\\" wat\"; key=value"), Ok((
449            ParamValue::Quoted(&"utf-8, \\\" wat"[..]),
450            &" key=value"[..],
451        )));
452
453        assert_eq!(ParamValue::new("\"utf-8; other\"; key=value"), Ok((
454            ParamValue::Quoted(&"utf-8; other"[..]),
455            &" key=value"[..],
456        )));
457
458        assert_eq!(ParamValue::new("\"utf-8\\\"\"; key=value"), Ok((
459            ParamValue::Quoted(&"utf-8\\\""[..]),
460            &" key=value"[..],
461        )));
462
463        assert_eq!(ParamValue::new("\"utf-8\\\"\" \t\t ; key=value"), Ok((
464            ParamValue::Quoted(&"utf-8\\\""[..]),
465            &" key=value"[..],
466        )));
467
468        assert_eq!(ParamValue::new("\"utf-8\\\"\" wrong; key=value"), Err(()));
469
470        assert_eq!(ParamValue::new("utf-8; key=value"), Ok((
471            ParamValue::Unquoted(&"utf-8"[..]),
472            &" key=value"[..],
473        )));
474
475        assert_eq!(ParamValue::new("some-value   "), Ok((
476            ParamValue::Unquoted(&"some-value"[..]),
477            &""[..],
478        )));
479
480        assert_eq!(ParamValue::new("utf-8 abc; key=value"), Ok((
481            ParamValue::Unquoted(&"utf-8 abc"[..]),
482            &" key=value"[..],
483        )));
484    }
485
486    #[test]
487    fn test_media_params() {
488        let mut p = MediaParams::new("charset=utf-8");
489
490        let (k, v) = p.next().unwrap().unwrap();
491        assert_eq!(k, "charset");
492        assert_eq!(v, ParamValue::Unquoted("utf-8"));
493
494        assert!(p.next().is_none());
495
496        let mut p = MediaParams::new(" charset=\"utf-8\"");
497
498        let (k, v) = p.next().unwrap().unwrap();
499        assert_eq!(k, "charset");
500        assert_eq!(v, ParamValue::Quoted("utf-8"));
501
502        assert!(p.next().is_none());
503
504        let mut p = MediaParams::new(
505            "  \tcharset=utf-8; chars=\"utf-42; wat\";key=\"some \\\"value\\\"\";   k=v  \t\t"
506        );
507
508        let (k, v) = p.next().unwrap().unwrap();
509        assert_eq!(k, "charset");
510        assert_eq!(v, ParamValue::Unquoted("utf-8"));
511
512        let (k, v) = p.next().unwrap().unwrap();
513        assert_eq!(k, "chars");
514        assert_eq!(v, ParamValue::Quoted("utf-42; wat"));
515
516        let (k, v) = p.next().unwrap().unwrap();
517        assert_eq!(k, "key");
518        assert_eq!(v, ParamValue::Quoted("some \\\"value\\\""));
519
520        let (k, v) = p.next().unwrap().unwrap();
521        assert_eq!(k, "k");
522        assert_eq!(v, ParamValue::Unquoted("v"));
523
524        assert!(p.next().is_none());
525    }
526
527    #[test]
528    fn test_media_type_params() {
529        let m = MediaType::new("text/html;charset=utf-8").unwrap();
530        assert_eq!(m.mimetype, "text/html");
531        let mut p = MediaParams::new(m.params);
532        let (k, v) = p.next().unwrap().unwrap();
533        assert_eq!(k, "charset");
534        assert_eq!(v, ParamValue::Unquoted("utf-8"));
535        assert!(p.next().is_none());
536
537        let m = MediaType::new("text/*;charset=utf-8").unwrap();
538        assert_eq!(m.mimetype, "text/*");
539        let mut p = MediaParams::new(m.params);
540        let (k, v) = p.next().unwrap().unwrap();
541        assert_eq!(k, "charset");
542        assert_eq!(v, ParamValue::Unquoted("utf-8"));
543        assert!(p.next().is_none());
544
545        let m = MediaType::new("text/html; charset=\"utf-8\"").unwrap();
546        assert_eq!(m.mimetype, "text/html");
547        let mut p = MediaParams::new(m.params);
548        let (k, v) = p.next().unwrap().unwrap();
549        assert_eq!(k, "charset");
550        assert_eq!(v, ParamValue::Quoted("utf-8"));
551        assert!(p.next().is_none());
552
553        let m = MediaType::new("text/json; charset=\"utf-8\"; key=val  \t").unwrap();
554        assert_eq!(m.mimetype, "text/json");
555        let mut p = MediaParams::new(m.params);
556        let (k, v) = p.next().unwrap().unwrap();
557        assert_eq!(k, "charset");
558        assert_eq!(v, ParamValue::Quoted("utf-8"));
559        let (k, v) = p.next().unwrap().unwrap();
560        assert_eq!(k, "key");
561        assert_eq!(v, ParamValue::Unquoted("val"));
562        assert!(p.next().is_none());
563
564        let m = MediaType::new("text/json").unwrap();
565        assert_eq!(m.mimetype, "text/json");
566        let mut p = MediaParams::new(m.params);
567        assert!(p.next().is_none());
568
569        let m = MediaType::new("text/json ;").unwrap();
570        assert_eq!(m.mimetype, "text/json");
571        let mut p = MediaParams::new(m.params);
572        assert!(p.next().is_none());
573    }
574}