Skip to main content

email/
rfc2047.rs

1//! Module for decoding RFC 2047 strings
2// use for to_ascii_lowercase
3use base64::decode;
4
5use encoding::label::encoding_from_whatwg_label;
6use encoding::DecoderTrap;
7
8/// Decode an RFC 2047 string (`s`) into a Rust String.
9///
10/// Will accept either "Q" encoding (RFC 2047 Section 4.2) or
11/// "B" encoding (BASE64)
12/// [unstable]
13pub fn decode_rfc2047(s: &str) -> Option<String> {
14    let parts: Vec<&str> = s.split('?').collect();
15    if parts.len() != 5 || parts[0] != "=" || parts[4] != "=" {
16        None
17    } else {
18        let charset = parts[1].to_ascii_lowercase();
19        let encoding = parts[2].to_ascii_lowercase();
20        let content = parts[3];
21
22        let bytes = match &encoding[..] {
23            "q" => decode_q_encoding(content),
24            "b" => decode_base64_encoding(content),
25            _ => panic!("Unknown encoding type"),
26        };
27
28        // XXX: Relies on WHATWG labels, rather than MIME labels for
29        // charset. Consider adding mapping upstream.
30        let decoder = encoding_from_whatwg_label(&charset[..]);
31
32        match (bytes, decoder) {
33            (Ok(b), Some(d)) => d.decode(&b, DecoderTrap::Replace).ok(),
34            _ => None,
35        }
36    }
37}
38
39pub fn decode_q_encoding(s: &str) -> Result<Vec<u8>, String> {
40    let mut result = Vec::new();
41    let mut char_iter = s.chars();
42
43    loop {
44        match char_iter.next() {
45            Some('=') => {
46                let mut hex_string = String::new();
47                match char_iter.next().unwrap() {
48                    '\r' => {
49                        // Possible continuation - expect the next character to be a newline
50                        if char_iter.next().unwrap() == '\n' {
51                            continue;
52                        } else {
53                            return Err("Invalid line endings in text".to_string());
54                        }
55                    }
56                    '\n' => continue, // treat unix line endings similar to CRLF
57                    c => {
58                        hex_string.push(c);
59                        hex_string.push(char_iter.next().unwrap());
60                    }
61                }
62                let hex_string_slice = &hex_string[..];
63                match u8::from_str_radix(hex_string_slice, 16) {
64                    Ok(char_val) => result.push(char_val),
65                    Err(e) => return Err(format!("'{}' is not a hex number: {}", hex_string, e)),
66                }
67            }
68            Some(c) => {
69                result.push(c as u8);
70            }
71            None => break,
72        };
73    }
74
75    Ok(result)
76}
77
78fn decode_base64_encoding(s: &str) -> Result<Vec<u8>, String> {
79    match decode(s) {
80        Ok(bytes) => Ok(bytes),
81        Err(_) => Err("Failed to base64 decode".to_string()),
82    }
83}
84
85#[cfg(test)]
86mod tests {
87    use super::*;
88
89    struct DecodeTest<'s> {
90        input: &'s str,
91        output: &'s str,
92    }
93
94    struct DecodeByteTest<'s> {
95        input: &'s str,
96        output: &'s [u8],
97    }
98
99    #[test]
100    fn test_decode() {
101        let tests = [
102            DecodeTest {
103                input: "=?ISO-8859-1?Q?Test=20text?=",
104                output: "Test text",
105            },
106            DecodeTest {
107                input: "=?ISO-8859-1?b?VGVzdCB0ZXh0?=",
108                output: "Test text",
109            },
110            DecodeTest {
111                input: "=?utf-8?b?44GT44KT44Gr44Gh44Gv44CC?=",
112                output: "こんにちは。",
113            },
114        ];
115
116        for t in tests.iter() {
117            assert_eq!(decode_rfc2047(t.input).unwrap(), t.output.to_string());
118        }
119    }
120
121    #[test]
122    fn test_multiline_quoted_printable_decode() {
123        let tests = [
124            // Test with CRLF line endings
125            DecodeByteTest {
126                input: "Python 2=2E=\r\n6",
127                output: &[80, 121, 116, 104, 111, 110, 32, 50, 46, 54],
128            },
129            // Test with Unix line endings
130            DecodeByteTest {
131                input: "Python 2=2E=\n6",
132                output: &[80, 121, 116, 104, 111, 110, 32, 50, 46, 54],
133            },
134        ];
135
136        for t in tests.iter() {
137            assert_eq!(decode_q_encoding(t.input).unwrap(), t.output.to_vec());
138        }
139    }
140
141    #[test]
142    fn test_decode_failure() {
143        let tests = [
144            // Invalid base64
145            "=?ISO-8859-1?b?-?=",
146            // Not valid RFC 2047
147            "=?Doesn't end with equals",
148            // Unknown charset
149            "=?NOCHARSET?q?foo?=",
150        ];
151
152        for t in tests.iter() {
153            println!("{}", t);
154            assert!(decode_rfc2047(*t).is_none());
155        }
156    }
157}