mco_http/header/
parsing.rs

1//! Utility functions for Header implementations.
2
3use language_tags::LanguageTag;
4use std::str;
5use std::str::FromStr;
6use std::fmt::{self, Display};
7use url::percent_encoding;
8
9use crate::header::shared::Charset;
10
11/// Reads a single raw string when parsing a header.
12pub fn from_one_raw_str<T: str::FromStr>(raw: &[Vec<u8>]) -> crate::Result<T> {
13    if raw.len() != 1 || unsafe { raw.get_unchecked(0) } == b"" { return Err(crate::Error::Header) }
14    // we JUST checked that raw.len() == 1, so raw[0] WILL exist.
15    from_raw_str(& unsafe { raw.get_unchecked(0) })
16}
17
18/// Reads a raw string into a value.
19pub fn from_raw_str<T: str::FromStr>(raw: &[u8]) -> crate::Result<T> {
20    let s = str::from_utf8(raw)?;
21    T::from_str(s).or(Err(crate::Error::Header))
22}
23
24/// Reads a comma-delimited raw header into a Vec.
25#[inline]
26pub fn from_comma_delimited<T: str::FromStr, S: AsRef<[u8]>>(raw: &[S]) -> crate::Result<Vec<T>> {
27    let mut result = Vec::new();
28    for s in raw {
29        let s = str::from_utf8(s.as_ref())?;
30        result.extend(s.split(',')
31                      .filter_map(|x| match x.trim() {
32                          "" => None,
33                          y => Some(y)
34                      })
35                      .filter_map(|x| x.parse().ok()))
36    }
37    Ok(result)
38}
39
40/// Format an array into a comma-delimited string.
41pub fn fmt_comma_delimited<T: Display>(f: &mut fmt::Formatter, parts: &[T]) -> fmt::Result {
42    for (i, part) in parts.iter().enumerate() {
43        if i != 0 {
44            f.write_str(", ")?;
45        }
46        Display::fmt(part, f)?;
47    }
48    Ok(())
49}
50
51/// An extended header parameter value (i.e., tagged with a character set and optionally,
52/// a language), as defined in [RFC 5987](https://tools.ietf.org/html/rfc5987#section-3.2).
53#[derive(Clone, Debug, PartialEq)]
54pub struct ExtendedValue {
55    /// The character set that is used to encode the `value` to a string.
56    pub charset: Charset,
57    /// The human language details of the `value`, if available.
58    pub language_tag: Option<LanguageTag>,
59    /// The parameter value, as expressed in octets.
60    pub value: Vec<u8>,
61}
62
63/// Parses extended header parameter values (`ext-value`), as defined in
64/// [RFC 5987](https://tools.ietf.org/html/rfc5987#section-3.2).
65///
66/// Extended values are denoted by parameter names that end with `*`.
67///
68/// ## ABNF
69/// ```plain
70/// ext-value     = charset  "'" [ language ] "'" value-chars
71///               ; like RFC 2231's <extended-initial-value>
72///               ; (see [RFC2231], Section 7)
73///
74/// charset       = "UTF-8" / "ISO-8859-1" / mime-charset
75///
76/// mime-charset  = 1*mime-charsetc
77/// mime-charsetc = ALPHA / DIGIT
78///               / "!" / "#" / "$" / "%" / "&"
79///               / "+" / "-" / "^" / "_" / "`"
80///               / "{" / "}" / "~"
81///               ; as <mime-charset> in Section 2.3 of [RFC2978]
82///               ; except that the single quote is not included
83///               ; SHOULD be registered in the IANA charset registry
84///
85/// language      = <Language-Tag, defined in [RFC5646], Section 2.1>
86///
87/// value-chars   = *( pct-encoded / attr-char )
88///
89/// pct-encoded   = "%" HEXDIG HEXDIG
90///               ; see [RFC3986], Section 2.1
91///
92/// attr-char     = ALPHA / DIGIT
93///               / "!" / "#" / "$" / "&" / "+" / "-" / "."
94///               / "^" / "_" / "`" / "|" / "~"
95///               ; token except ( "*" / "'" / "%" )
96/// ```
97pub fn parse_extended_value(val: &str) -> crate::Result<ExtendedValue> {
98
99    // Break into three pieces separated by the single-quote character
100    let mut parts = val.splitn(3,'\'');
101
102    // Interpret the first piece as a Charset
103    let charset: Charset = match parts.next() {
104        None => return Err(crate::Error::Header),
105        Some(n) => FromStr::from_str(n)?,
106    };
107
108    // Interpret the second piece as a language tag
109    let lang: Option<LanguageTag> = match parts.next() {
110        None => return Err(crate::Error::Header),
111        Some("") => None,
112        Some(s) => match s.parse() {
113            Ok(lt) => Some(lt),
114            Err(_) => return Err(crate::Error::Header),
115        }
116    };
117
118    // Interpret the third piece as a sequence of value characters
119    let value: Vec<u8> = match parts.next() {
120        None => return Err(crate::Error::Header),
121        Some(v) => percent_encoding::percent_decode(v.as_bytes()).collect(),
122    };
123
124    Ok(ExtendedValue {
125        charset: charset,
126        language_tag: lang,
127        value: value,
128    })
129}
130
131define_encode_set! {
132    /// This encode set is used for HTTP header values and is defined at
133    /// https://tools.ietf.org/html/rfc5987#section-3.2
134    pub HTTP_VALUE = [percent_encoding::SIMPLE_ENCODE_SET] | {
135        ' ', '"', '%', '\'', '(', ')', '*', ',', '/', ':', ';', '<', '-', '>', '?',
136        '[', '\\', ']', '{', '}'
137    }
138}
139
140impl Display for ExtendedValue {
141    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
142        let encoded_value =
143            percent_encoding::percent_encode(&self.value[..], HTTP_VALUE);
144        if let Some(ref lang) = self.language_tag {
145            write!(f, "{}'{}'{}", self.charset, lang, encoded_value)
146        } else {
147            write!(f, "{}''{}", self.charset, encoded_value)
148        }
149    }
150}
151
152#[cfg(test)]
153mod tests {
154    use crate::header::shared::Charset;
155    use super::{ExtendedValue, parse_extended_value};
156
157    #[test]
158    fn test_parse_extended_value_with_encoding_and_language_tag() {
159        let expected_language_tag = langtag!(en);
160        // RFC 5987, Section 3.2.2
161        // Extended notation, using the Unicode character U+00A3 (POUND SIGN)
162        let result = parse_extended_value("iso-8859-1'en'%A3%20rates");
163        assert!(result.is_ok());
164        let extended_value = result.unwrap();
165        assert_eq!(Charset::Iso_8859_1, extended_value.charset);
166        assert!(extended_value.language_tag.is_some());
167        assert_eq!(expected_language_tag, extended_value.language_tag.unwrap());
168        assert_eq!(vec![163, b' ', b'r', b'a', b't', b'e', b's'], extended_value.value);
169    }
170
171    #[test]
172    fn test_parse_extended_value_with_encoding() {
173        // RFC 5987, Section 3.2.2
174        // Extended notation, using the Unicode characters U+00A3 (POUND SIGN)
175        // and U+20AC (EURO SIGN)
176        let result = parse_extended_value("UTF-8''%c2%a3%20and%20%e2%82%ac%20rates");
177        assert!(result.is_ok());
178        let extended_value = result.unwrap();
179        assert_eq!(Charset::Ext("UTF-8".to_string()), extended_value.charset);
180        assert!(extended_value.language_tag.is_none());
181        assert_eq!(vec![194, 163, b' ', b'a', b'n', b'd', b' ', 226, 130, 172, b' ', b'r', b'a', b't', b'e', b's'], extended_value.value);
182    }
183
184    #[test]
185    fn test_parse_extended_value_missing_language_tag_and_encoding() {
186        // From: https://greenbytes.de/tech/tc2231/#attwithfn2231quot2
187        let result = parse_extended_value("foo%20bar.html");
188        assert!(result.is_err());
189    }
190
191    #[test]
192    fn test_parse_extended_value_partially_formatted() {
193        let result = parse_extended_value("UTF-8'missing third part");
194        assert!(result.is_err());
195    }
196
197    #[test]
198    fn test_parse_extended_value_partially_formatted_blank() {
199        let result = parse_extended_value("blank second part'");
200        assert!(result.is_err());
201    }
202
203    #[test]
204    fn test_fmt_extended_value_with_encoding_and_language_tag() {
205        let extended_value = ExtendedValue {
206            charset: Charset::Iso_8859_1,
207            language_tag: Some("en".parse().expect("Could not parse language tag")),
208            value: vec![163, b' ', b'r', b'a', b't', b'e', b's'],
209        };
210        assert_eq!("ISO-8859-1'en'%A3%20rates", format!("{}", extended_value));
211    }
212
213    #[test]
214    fn test_fmt_extended_value_with_encoding() {
215        let extended_value = ExtendedValue {
216            charset: Charset::Ext("UTF-8".to_string()),
217            language_tag: None,
218            value: vec![194, 163, b' ', b'a', b'n', b'd', b' ', 226, 130, 172, b' ', b'r', b'a',
219                        b't', b'e', b's'],
220        };
221        assert_eq!("UTF-8''%C2%A3%20and%20%E2%82%AC%20rates",
222                   format!("{}", extended_value));
223    }
224}