hyper_sync/header/
parsing.rs

1//! Utility functions for Header implementations.
2
3use language_tags::LanguageTag;
4use percent_encoding;
5use std::str;
6use std::str::FromStr;
7use std::fmt::{self, Display};
8
9use header::Raw;
10use header::shared::Charset;
11
12/// Reads a single raw string when parsing a header.
13pub fn from_one_raw_str<T: str::FromStr>(raw: &Raw) -> ::Result<T> {
14    if let Some(line) = raw.one() {
15        if !line.is_empty() {
16            return from_raw_str(line)
17        }
18    }
19    Err(::Error::Header)
20}
21
22/// Reads a raw string into a value.
23pub fn from_raw_str<T: str::FromStr>(raw: &[u8]) -> ::Result<T> {
24    let s = try!(str::from_utf8(raw)).trim();
25    T::from_str(s).or(Err(::Error::Header))
26}
27
28/// Reads a comma-delimited raw header into a Vec.
29#[inline]
30pub fn from_comma_delimited<T: str::FromStr>(raw: &Raw) -> ::Result<Vec<T>> {
31    let mut result = Vec::new();
32    for s in raw {
33        let s = try!(str::from_utf8(s.as_ref()));
34        result.extend(s.split(',')
35                      .filter_map(|x| match x.trim() {
36                          "" => None,
37                          y => Some(y)
38                      })
39                      .filter_map(|x| x.trim().parse().ok()))
40    }
41    Ok(result)
42}
43
44/// Format an array into a comma-delimited string.
45pub fn fmt_comma_delimited<T: Display>(f: &mut fmt::Formatter, parts: &[T]) -> fmt::Result {
46    let mut iter = parts.iter();
47    if let Some(part) = iter.next() {
48        try!(Display::fmt(part, f));
49    }
50    for part in iter {
51        try!(f.write_str(", "));
52        try!(Display::fmt(part, f));
53    }
54    Ok(())
55}
56
57/// An extended header parameter value (i.e., tagged with a character set and optionally,
58/// a language), as defined in [RFC 5987](https://tools.ietf.org/html/rfc5987#section-3.2).
59#[derive(Clone, Debug, PartialEq)]
60pub struct ExtendedValue {
61    /// The character set that is used to encode the `value` to a string.
62    pub charset: Charset,
63    /// The human language details of the `value`, if available.
64    pub language_tag: Option<LanguageTag>,
65    /// The parameter value, as expressed in octets.
66    pub value: Vec<u8>,
67}
68
69/// Parses extended header parameter values (`ext-value`), as defined in
70/// [RFC 5987](https://tools.ietf.org/html/rfc5987#section-3.2).
71///
72/// Extended values are denoted by parameter names that end with `*`.
73///
74/// ## ABNF
75///
76/// ```text
77/// ext-value     = charset  "'" [ language ] "'" value-chars
78///               ; like RFC 2231's <extended-initial-value>
79///               ; (see [RFC2231], Section 7)
80///
81/// charset       = "UTF-8" / "ISO-8859-1" / mime-charset
82///
83/// mime-charset  = 1*mime-charsetc
84/// mime-charsetc = ALPHA / DIGIT
85///               / "!" / "#" / "$" / "%" / "&"
86///               / "+" / "-" / "^" / "_" / "`"
87///               / "{" / "}" / "~"
88///               ; as <mime-charset> in Section 2.3 of [RFC2978]
89///               ; except that the single quote is not included
90///               ; SHOULD be registered in the IANA charset registry
91///
92/// language      = <Language-Tag, defined in [RFC5646], Section 2.1>
93///
94/// value-chars   = *( pct-encoded / attr-char )
95///
96/// pct-encoded   = "%" HEXDIG HEXDIG
97///               ; see [RFC3986], Section 2.1
98///
99/// attr-char     = ALPHA / DIGIT
100///               / "!" / "#" / "$" / "&" / "+" / "-" / "."
101///               / "^" / "_" / "`" / "|" / "~"
102///               ; token except ( "*" / "'" / "%" )
103/// ```
104pub fn parse_extended_value(val: &str) -> ::Result<ExtendedValue> {
105
106    // Break into three pieces separated by the single-quote character
107    let mut parts = val.splitn(3,'\'');
108
109    // Interpret the first piece as a Charset
110    let charset: Charset = match parts.next() {
111        None => return Err(::Error::Header),
112        Some(n) => try!(FromStr::from_str(n)),
113    };
114
115    // Interpret the second piece as a language tag
116    let lang: Option<LanguageTag> = match parts.next() {
117        None => return Err(::Error::Header),
118        Some("") => None,
119        Some(s) => match s.parse() {
120            Ok(lt) => Some(lt),
121            Err(_) => return Err(::Error::Header),
122        }
123    };
124
125    // Interpret the third piece as a sequence of value characters
126    let value: Vec<u8> = match parts.next() {
127        None => return Err(::Error::Header),
128        Some(v) => percent_encoding::percent_decode(v.as_bytes()).collect(),
129    };
130
131    Ok(ExtendedValue {
132        charset: charset,
133        language_tag: lang,
134        value: value,
135    })
136}
137
138
139impl Display for ExtendedValue {
140    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
141        let encoded_value =
142            percent_encoding::percent_encode(&self.value[..], self::percent_encoding_http::HTTP_VALUE);
143        if let Some(ref lang) = self.language_tag {
144            write!(f, "{}'{}'{}", self.charset, lang, encoded_value)
145        } else {
146            write!(f, "{}''{}", self.charset, encoded_value)
147        }
148    }
149}
150
151/// Percent encode a sequence of bytes with a character set defined in
152/// [https://tools.ietf.org/html/rfc5987#section-3.2][url]
153///
154/// [url]: https://tools.ietf.org/html/rfc5987#section-3.2
155pub fn http_percent_encode(f: &mut fmt::Formatter, bytes: &[u8]) -> fmt::Result {
156    let encoded = percent_encoding::percent_encode(bytes, self::percent_encoding_http::HTTP_VALUE);
157    fmt::Display::fmt(&encoded, f)
158}
159
160mod percent_encoding_http {
161    use percent_encoding;
162
163    // internal module because macro is hard-coded to make a public item
164    // but we don't want to public export this item
165    define_encode_set! {
166        // This encode set is used for HTTP header values and is defined at
167        // https://tools.ietf.org/html/rfc5987#section-3.2
168        pub HTTP_VALUE = [percent_encoding::SIMPLE_ENCODE_SET] | {
169            ' ', '"', '%', '\'', '(', ')', '*', ',', '/', ':', ';', '<', '-', '>', '?',
170            '[', '\\', ']', '{', '}'
171        }
172    }
173}
174
175#[cfg(test)]
176mod tests {
177    use header::shared::Charset;
178    use super::{ExtendedValue, parse_extended_value};
179    use language_tags::LanguageTag;
180
181    #[test]
182    fn test_parse_extended_value_with_encoding_and_language_tag() {
183        let expected_language_tag = "en".parse::<LanguageTag>().unwrap();
184        // RFC 5987, Section 3.2.2
185        // Extended notation, using the Unicode character U+00A3 (POUND SIGN)
186        let result = parse_extended_value("iso-8859-1'en'%A3%20rates");
187        assert!(result.is_ok());
188        let extended_value = result.unwrap();
189        assert_eq!(Charset::Iso_8859_1, extended_value.charset);
190        assert!(extended_value.language_tag.is_some());
191        assert_eq!(expected_language_tag, extended_value.language_tag.unwrap());
192        assert_eq!(vec![163, b' ', b'r', b'a', b't', b'e', b's'], extended_value.value);
193    }
194
195    #[test]
196    fn test_parse_extended_value_with_encoding() {
197        // RFC 5987, Section 3.2.2
198        // Extended notation, using the Unicode characters U+00A3 (POUND SIGN)
199        // and U+20AC (EURO SIGN)
200        let result = parse_extended_value("UTF-8''%c2%a3%20and%20%e2%82%ac%20rates");
201        assert!(result.is_ok());
202        let extended_value = result.unwrap();
203        assert_eq!(Charset::Ext("UTF-8".to_string()), extended_value.charset);
204        assert!(extended_value.language_tag.is_none());
205        assert_eq!(vec![194, 163, b' ', b'a', b'n', b'd', b' ', 226, 130, 172, b' ', b'r', b'a', b't', b'e', b's'], extended_value.value);
206    }
207
208    #[test]
209    fn test_parse_extended_value_missing_language_tag_and_encoding() {
210        // From: https://greenbytes.de/tech/tc2231/#attwithfn2231quot2
211        let result = parse_extended_value("foo%20bar.html");
212        assert!(result.is_err());
213    }
214
215    #[test]
216    fn test_parse_extended_value_partially_formatted() {
217        let result = parse_extended_value("UTF-8'missing third part");
218        assert!(result.is_err());
219    }
220
221    #[test]
222    fn test_parse_extended_value_partially_formatted_blank() {
223        let result = parse_extended_value("blank second part'");
224        assert!(result.is_err());
225    }
226
227    #[test]
228    fn test_fmt_extended_value_with_encoding_and_language_tag() {
229        let extended_value = ExtendedValue {
230            charset: Charset::Iso_8859_1,
231            language_tag: Some("en".parse().expect("Could not parse language tag")),
232            value: vec![163, b' ', b'r', b'a', b't', b'e', b's'],
233        };
234        assert_eq!("ISO-8859-1'en'%A3%20rates", format!("{}", extended_value));
235    }
236
237    #[test]
238    fn test_fmt_extended_value_with_encoding() {
239        let extended_value = ExtendedValue {
240            charset: Charset::Ext("UTF-8".to_string()),
241            language_tag: None,
242            value: vec![194, 163, b' ', b'a', b'n', b'd', b' ', 226, 130, 172, b' ', b'r', b'a',
243                        b't', b'e', b's'],
244        };
245        assert_eq!("UTF-8''%C2%A3%20and%20%E2%82%AC%20rates",
246                   format!("{}", extended_value));
247    }
248}