hyper_old_types/header/
parsing.rs

1//! Utility functions for Header implementations.
2
3use language_tags::LanguageTag;
4use std::str;
5use std::str::FromStr;
6use std::fmt::{self, Display};
7use percent_encoding;
8
9use header::Raw;
10use header::shared::Charset;
11
12
13/// Reads a single raw string when parsing a header.
14pub fn from_one_raw_str<T: str::FromStr>(raw: &Raw) -> ::Result<T> {
15    if let Some(line) = raw.one() {
16        if !line.is_empty() {
17            return from_raw_str(line)
18        }
19    }
20    Err(::Error::Header)
21}
22
23/// Reads a raw string into a value.
24pub fn from_raw_str<T: str::FromStr>(raw: &[u8]) -> ::Result<T> {
25    let s = try!(str::from_utf8(raw)).trim();
26    T::from_str(s).or(Err(::Error::Header))
27}
28
29/// Reads a comma-delimited raw header into a Vec.
30#[inline]
31pub fn from_comma_delimited<T: str::FromStr>(raw: &Raw) -> ::Result<Vec<T>> {
32    let mut result = Vec::new();
33    for s in raw {
34        let s = try!(str::from_utf8(s.as_ref()));
35        result.extend(s.split(',')
36                      .filter_map(|x| match x.trim() {
37                          "" => None,
38                          y => Some(y)
39                      })
40                      .filter_map(|x| x.trim().parse().ok()))
41    }
42    Ok(result)
43}
44
45/// Format an array into a comma-delimited string.
46pub fn fmt_comma_delimited<T: Display>(f: &mut fmt::Formatter, parts: &[T]) -> fmt::Result {
47    let mut iter = parts.iter();
48    if let Some(part) = iter.next() {
49        try!(Display::fmt(part, f));
50    }
51    for part in iter {
52        try!(f.write_str(", "));
53        try!(Display::fmt(part, f));
54    }
55    Ok(())
56}
57
58/// An extended header parameter value (i.e., tagged with a character set and optionally,
59/// a language), as defined in [RFC 5987](https://tools.ietf.org/html/rfc5987#section-3.2).
60#[derive(Clone, Debug, PartialEq)]
61pub struct ExtendedValue {
62    /// The character set that is used to encode the `value` to a string.
63    pub charset: Charset,
64    /// The human language details of the `value`, if available.
65    pub language_tag: Option<LanguageTag>,
66    /// The parameter value, as expressed in octets.
67    pub value: Vec<u8>,
68}
69
70/// Parses extended header parameter values (`ext-value`), as defined in
71/// [RFC 5987](https://tools.ietf.org/html/rfc5987#section-3.2).
72///
73/// Extended values are denoted by parameter names that end with `*`.
74///
75/// ## ABNF
76///
77/// ```text
78/// ext-value     = charset  "'" [ language ] "'" value-chars
79///               ; like RFC 2231's <extended-initial-value>
80///               ; (see [RFC2231], Section 7)
81///
82/// charset       = "UTF-8" / "ISO-8859-1" / mime-charset
83///
84/// mime-charset  = 1*mime-charsetc
85/// mime-charsetc = ALPHA / DIGIT
86///               / "!" / "#" / "$" / "%" / "&"
87///               / "+" / "-" / "^" / "_" / "`"
88///               / "{" / "}" / "~"
89///               ; as <mime-charset> in Section 2.3 of [RFC2978]
90///               ; except that the single quote is not included
91///               ; SHOULD be registered in the IANA charset registry
92///
93/// language      = <Language-Tag, defined in [RFC5646], Section 2.1>
94///
95/// value-chars   = *( pct-encoded / attr-char )
96///
97/// pct-encoded   = "%" HEXDIG HEXDIG
98///               ; see [RFC3986], Section 2.1
99///
100/// attr-char     = ALPHA / DIGIT
101///               / "!" / "#" / "$" / "&" / "+" / "-" / "."
102///               / "^" / "_" / "`" / "|" / "~"
103///               ; token except ( "*" / "'" / "%" )
104/// ```
105pub fn parse_extended_value(val: &str) -> ::Result<ExtendedValue> {
106
107    // Break into three pieces separated by the single-quote character
108    let mut parts = val.splitn(3,'\'');
109
110    // Interpret the first piece as a Charset
111    let charset: Charset = match parts.next() {
112        None => return Err(::Error::Header),
113        Some(n) => try!(FromStr::from_str(n)),
114    };
115
116    // Interpret the second piece as a language tag
117    let lang: Option<LanguageTag> = match parts.next() {
118        None => return Err(::Error::Header),
119        Some("") => None,
120        Some(s) => match s.parse() {
121            Ok(lt) => Some(lt),
122            Err(_) => return Err(::Error::Header),
123        }
124    };
125
126    // Interpret the third piece as a sequence of value characters
127    let value: Vec<u8> = match parts.next() {
128        None => return Err(::Error::Header),
129        Some(v) => percent_encoding::percent_decode(v.as_bytes()).collect(),
130    };
131
132    Ok(ExtendedValue {
133        charset: charset,
134        language_tag: lang,
135        value: value,
136    })
137}
138
139
140impl Display for ExtendedValue {
141    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
142        let encoded_value =
143            percent_encoding::percent_encode(&self.value[..], self::percent_encoding_http::HTTP_VALUE);
144        if let Some(ref lang) = self.language_tag {
145            write!(f, "{}'{}'{}", self.charset, lang, encoded_value)
146        } else {
147            write!(f, "{}''{}", self.charset, encoded_value)
148        }
149    }
150}
151
152/// Percent encode a sequence of bytes with a character set defined in
153/// [https://tools.ietf.org/html/rfc5987#section-3.2][url]
154///
155/// [url]: https://tools.ietf.org/html/rfc5987#section-3.2
156pub fn http_percent_encode(f: &mut fmt::Formatter, bytes: &[u8]) -> fmt::Result {
157    let encoded = percent_encoding::percent_encode(bytes, self::percent_encoding_http::HTTP_VALUE);
158    fmt::Display::fmt(&encoded, f)
159}
160
161mod percent_encoding_http {
162    use percent_encoding;
163
164    // internal module because macro is hard-coded to make a public item
165    // but we don't want to public export this item
166    define_encode_set! {
167        // This encode set is used for HTTP header values and is defined at
168        // https://tools.ietf.org/html/rfc5987#section-3.2
169        pub HTTP_VALUE = [percent_encoding::SIMPLE_ENCODE_SET] | {
170            ' ', '"', '%', '\'', '(', ')', '*', ',', '/', ':', ';', '<', '-', '>', '?',
171            '[', '\\', ']', '{', '}'
172        }
173    }
174}
175
176#[cfg(test)]
177mod tests {
178    use header::shared::Charset;
179    use super::{ExtendedValue, parse_extended_value};
180    use language_tags::LanguageTag;
181
182    #[test]
183    fn test_parse_extended_value_with_encoding_and_language_tag() {
184        let expected_language_tag = "en".parse::<LanguageTag>().unwrap();
185        // RFC 5987, Section 3.2.2
186        // Extended notation, using the Unicode character U+00A3 (POUND SIGN)
187        let result = parse_extended_value("iso-8859-1'en'%A3%20rates");
188        assert!(result.is_ok());
189        let extended_value = result.unwrap();
190        assert_eq!(Charset::Iso_8859_1, extended_value.charset);
191        assert!(extended_value.language_tag.is_some());
192        assert_eq!(expected_language_tag, extended_value.language_tag.unwrap());
193        assert_eq!(vec![163, b' ', b'r', b'a', b't', b'e', b's'], extended_value.value);
194    }
195
196    #[test]
197    fn test_parse_extended_value_with_encoding() {
198        // RFC 5987, Section 3.2.2
199        // Extended notation, using the Unicode characters U+00A3 (POUND SIGN)
200        // and U+20AC (EURO SIGN)
201        let result = parse_extended_value("UTF-8''%c2%a3%20and%20%e2%82%ac%20rates");
202        assert!(result.is_ok());
203        let extended_value = result.unwrap();
204        assert_eq!(Charset::Ext("UTF-8".to_string()), extended_value.charset);
205        assert!(extended_value.language_tag.is_none());
206        assert_eq!(vec![194, 163, b' ', b'a', b'n', b'd', b' ', 226, 130, 172, b' ', b'r', b'a', b't', b'e', b's'], extended_value.value);
207    }
208
209    #[test]
210    fn test_parse_extended_value_missing_language_tag_and_encoding() {
211        // From: https://greenbytes.de/tech/tc2231/#attwithfn2231quot2
212        let result = parse_extended_value("foo%20bar.html");
213        assert!(result.is_err());
214    }
215
216    #[test]
217    fn test_parse_extended_value_partially_formatted() {
218        let result = parse_extended_value("UTF-8'missing third part");
219        assert!(result.is_err());
220    }
221
222    #[test]
223    fn test_parse_extended_value_partially_formatted_blank() {
224        let result = parse_extended_value("blank second part'");
225        assert!(result.is_err());
226    }
227
228    #[test]
229    fn test_fmt_extended_value_with_encoding_and_language_tag() {
230        let extended_value = ExtendedValue {
231            charset: Charset::Iso_8859_1,
232            language_tag: Some("en".parse().expect("Could not parse language tag")),
233            value: vec![163, b' ', b'r', b'a', b't', b'e', b's'],
234        };
235        assert_eq!("ISO-8859-1'en'%A3%20rates", format!("{}", extended_value));
236    }
237
238    #[test]
239    fn test_fmt_extended_value_with_encoding() {
240        let extended_value = ExtendedValue {
241            charset: Charset::Ext("UTF-8".to_string()),
242            language_tag: None,
243            value: vec![194, 163, b' ', b'a', b'n', b'd', b' ', 226, 130, 172, b' ', b'r', b'a',
244                        b't', b'e', b's'],
245        };
246        assert_eq!("UTF-8''%C2%A3%20and%20%E2%82%AC%20rates",
247                   format!("{}", extended_value));
248    }
249}