hyperx/header/
parsing.rs

1//! Utility functions for Header implementations.
2
3use language_tags::LanguageTag;
4use std::str;
5use std::str::FromStr;
6use std::fmt::{self, Display};
7use percent_encoding;
8
9use header::RawLike;
10use header::shared::Charset;
11
12/// Reads a single raw string when parsing a header.
13pub fn from_one_raw_str<'a, R, T>(raw: &'a R) -> ::Result<T>
14where R: RawLike<'a>, T: str::FromStr
15{
16    if let Some(line) = raw.one() {
17        if !line.is_empty() {
18            return from_raw_str(line)
19        }
20    }
21    Err(::Error::Header)
22}
23
24/// Reads a raw string into a value.
25pub fn from_raw_str<T: str::FromStr>(raw: &[u8]) -> ::Result<T> {
26    let s = str::from_utf8(raw)?.trim();
27    T::from_str(s).or(Err(::Error::Header))
28}
29
30/// Reads a comma-delimited raw header into a Vec.
31#[inline]
32pub fn from_comma_delimited<'a, R, T>(raw: &'a R) -> ::Result<Vec<T>>
33where R: RawLike<'a>, T: str::FromStr
34{
35    let mut result = Vec::new();
36    for s in raw.iter() {
37        let s = str::from_utf8(s.as_ref())?;
38        result.extend(s.split(',')
39                      .filter_map(|x| match x.trim() {
40                          "" => None,
41                          y => Some(y)
42                      })
43                      .filter_map(|x| x.trim().parse().ok()))
44    }
45    Ok(result)
46}
47
48/// Format an array into a comma-delimited string.
49pub fn fmt_comma_delimited<T: Display>(f: &mut fmt::Formatter, parts: &[T]) -> fmt::Result {
50    let mut iter = parts.iter();
51    if let Some(part) = iter.next() {
52        Display::fmt(part, f)?;
53    }
54    for part in iter {
55        f.write_str(", ")?;
56        Display::fmt(part, f)?;
57    }
58    Ok(())
59}
60
61/// An extended header parameter value (i.e., tagged with a character set and optionally,
62/// a language), as defined in [RFC 5987](https://tools.ietf.org/html/rfc5987#section-3.2).
63#[derive(Clone, Debug, PartialEq)]
64pub struct ExtendedValue {
65    /// The character set that is used to encode the `value` to a string.
66    pub charset: Charset,
67    /// The human language details of the `value`, if available.
68    pub language_tag: Option<LanguageTag>,
69    /// The parameter value, as expressed in octets.
70    pub value: Vec<u8>,
71}
72
73/// Parses extended header parameter values (`ext-value`), as defined in
74/// [RFC 5987](https://tools.ietf.org/html/rfc5987#section-3.2).
75///
76/// Extended values are denoted by parameter names that end with `*`.
77///
78/// ## ABNF
79///
80/// ```text
81/// ext-value     = charset  "'" [ language ] "'" value-chars
82///               ; like RFC 2231's <extended-initial-value>
83///               ; (see [RFC2231], Section 7)
84///
85/// charset       = "UTF-8" / "ISO-8859-1" / mime-charset
86///
87/// mime-charset  = 1*mime-charsetc
88/// mime-charsetc = ALPHA / DIGIT
89///               / "!" / "#" / "$" / "%" / "&"
90///               / "+" / "-" / "^" / "_" / "`"
91///               / "{" / "}" / "~"
92///               ; as <mime-charset> in Section 2.3 of [RFC2978]
93///               ; except that the single quote is not included
94///               ; SHOULD be registered in the IANA charset registry
95///
96/// language      = <Language-Tag, defined in [RFC5646], Section 2.1>
97///
98/// value-chars   = *( pct-encoded / attr-char )
99///
100/// pct-encoded   = "%" HEXDIG HEXDIG
101///               ; see [RFC3986], Section 2.1
102///
103/// attr-char     = ALPHA / DIGIT
104///               / "!" / "#" / "$" / "&" / "+" / "-" / "."
105///               / "^" / "_" / "`" / "|" / "~"
106///               ; token except ( "*" / "'" / "%" )
107/// ```
108pub fn parse_extended_value(val: &str) -> ::Result<ExtendedValue> {
109
110    // Break into three pieces separated by the single-quote character
111    let mut parts = val.splitn(3,'\'');
112
113    // Interpret the first piece as a Charset
114    let charset: Charset = match parts.next() {
115        None => return Err(::Error::Header),
116        Some(n) => FromStr::from_str(n)?,
117    };
118
119    // Interpret the second piece as a language tag
120    let lang: Option<LanguageTag> = match parts.next() {
121        None => return Err(::Error::Header),
122        Some("") => None,
123        Some(s) => match s.parse() {
124            Ok(lt) => Some(lt),
125            Err(_) => return Err(::Error::Header),
126        }
127    };
128
129    // Interpret the third piece as a sequence of value characters
130    let value: Vec<u8> = match parts.next() {
131        None => return Err(::Error::Header),
132        Some(v) => percent_encoding::percent_decode(v.as_bytes()).collect(),
133    };
134
135    Ok(ExtendedValue {
136        charset: charset,
137        language_tag: lang,
138        value: value,
139    })
140}
141
142impl Display for ExtendedValue {
143    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
144        let encoded_value =
145            percent_encoding::percent_encode(
146                &self.value[..],
147                self::percent_encoding_http::HTTP_VALUE
148            );
149        if let Some(ref lang) = self.language_tag {
150            write!(f, "{}'{}'{}", self.charset, lang, encoded_value)
151        } else {
152            write!(f, "{}''{}", self.charset, encoded_value)
153        }
154    }
155}
156
157/// Percent encode a sequence of bytes with a character set defined in
158/// [https://tools.ietf.org/html/rfc5987#section-3.2][url]
159///
160/// [url]: https://tools.ietf.org/html/rfc5987#section-3.2
161pub fn http_percent_encode(f: &mut fmt::Formatter, bytes: &[u8]) -> fmt::Result {
162    let encoded = percent_encoding::percent_encode(
163        bytes,
164        self::percent_encoding_http::HTTP_VALUE
165    );
166    fmt::Display::fmt(&encoded, f)
167}
168
169mod percent_encoding_http {
170    use percent_encoding::{AsciiSet, CONTROLS};
171
172    // This encode set is used for HTTP header values and is defined at
173    // https://tools.ietf.org/html/rfc5987#section-3.2
174    pub const HTTP_VALUE: &AsciiSet = &CONTROLS
175        .add(b' ') .add(b'"') .add(b'%') .add(b'\'') .add(b'(')  .add(b')')
176        .add(b'*') .add(b',') .add(b'/') .add(b':')  .add(b';')  .add(b'<')
177        .add(b'-') .add(b'>') .add(b'?') .add(b'[')  .add(b'\\') .add(b']')
178        .add(b'{') .add(b'}');
179}
180
181#[cfg(test)]
182mod tests {
183    use header::shared::Charset;
184    use super::{ExtendedValue, parse_extended_value};
185    use language_tags::LanguageTag;
186
187    #[test]
188    fn test_parse_extended_value_with_encoding_and_language_tag() {
189        let expected_language_tag = "en".parse::<LanguageTag>().unwrap();
190        // RFC 5987, Section 3.2.2
191        // Extended notation, using the Unicode character U+00A3 (POUND SIGN)
192        let result = parse_extended_value("iso-8859-1'en'%A3%20rates");
193        assert!(result.is_ok());
194        let extended_value = result.unwrap();
195        assert_eq!(Charset::Iso_8859_1, extended_value.charset);
196        assert!(extended_value.language_tag.is_some());
197        assert_eq!(expected_language_tag, extended_value.language_tag.unwrap());
198        assert_eq!(vec![163, b' ', b'r', b'a', b't', b'e', b's'], extended_value.value);
199    }
200
201    #[test]
202    fn test_parse_extended_value_with_encoding() {
203        // RFC 5987, Section 3.2.2
204        // Extended notation, using the Unicode characters U+00A3 (POUND SIGN)
205        // and U+20AC (EURO SIGN)
206        let result = parse_extended_value("UTF-8''%c2%a3%20and%20%e2%82%ac%20rates");
207        assert!(result.is_ok());
208        let extended_value = result.unwrap();
209        assert_eq!(Charset::Ext("UTF-8".to_string()), extended_value.charset);
210        assert!(extended_value.language_tag.is_none());
211        assert_eq!(vec![194, 163, b' ', b'a', b'n', b'd', b' ', 226, 130, 172, b' ', b'r', b'a', b't', b'e', b's'], extended_value.value);
212    }
213
214    #[test]
215    fn test_parse_extended_value_missing_language_tag_and_encoding() {
216        // From: https://greenbytes.de/tech/tc2231/#attwithfn2231quot2
217        let result = parse_extended_value("foo%20bar.html");
218        assert!(result.is_err());
219    }
220
221    #[test]
222    fn test_parse_extended_value_partially_formatted() {
223        let result = parse_extended_value("UTF-8'missing third part");
224        assert!(result.is_err());
225    }
226
227    #[test]
228    fn test_parse_extended_value_partially_formatted_blank() {
229        let result = parse_extended_value("blank second part'");
230        assert!(result.is_err());
231    }
232
233    #[test]
234    fn test_fmt_extended_value_with_encoding_and_language_tag() {
235        let extended_value = ExtendedValue {
236            charset: Charset::Iso_8859_1,
237            language_tag: Some("en".parse().expect("Could not parse language tag")),
238            value: vec![163, b' ', b'r', b'a', b't', b'e', b's'],
239        };
240        assert_eq!("ISO-8859-1'en'%A3%20rates", format!("{}", extended_value));
241    }
242
243    #[test]
244    fn test_fmt_extended_value_with_encoding() {
245        let extended_value = ExtendedValue {
246            charset: Charset::Ext("UTF-8".to_string()),
247            language_tag: None,
248            value: vec![194, 163, b' ', b'a', b'n', b'd', b' ', 226, 130, 172, b' ', b'r', b'a',
249                        b't', b'e', b's'],
250        };
251        assert_eq!("UTF-8''%C2%A3%20and%20%E2%82%AC%20rates",
252                   format!("{}", extended_value));
253    }
254}