rfc2047_decoder/
decoder.rs

1use std::result;
2use thiserror::Error;
3
4use crate::{evaluator, lexer, parser};
5
6/// The possible errors which can occur while parsing the string.
7#[derive(Error, Debug, PartialEq)]
8pub enum Error {
9    /// Symbolises that an error occured in the lexer.
10    #[error(transparent)]
11    Lexer(#[from] lexer::Error),
12
13    /// Symbolises that an error occured in the parser.
14    #[error(transparent)]
15    Parser(#[from] parser::Error),
16
17    /// Symbolises that an error occured in the evaluator.
18    #[error(transparent)]
19    Evaluator(#[from] evaluator::Error),
20}
21
22/// Determines which strategy should be used if an encoded word isn't encoded as
23/// described in the RFC.
24#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
25pub enum RecoverStrategy {
26    /// Decode the encoded word although it's incorrectly encoded.
27    ///
28    /// # Example
29    /// Take a look to [Decoder#RecoveryStrategy::Decode](Decoder#recoverstrategydecode).
30    Decode,
31
32    /// Skip the incorrectly encoded encoded word.
33    ///
34    /// # Example
35    /// Take a look to [Decoder#RecoveryStrategy::Skip](Decoder#recoverstrategyskip).
36    Skip,
37
38    /// Abort the string-parsing and return an error.
39    ///
40    /// # Example
41    /// Take a look to [Decoder#RecoveryStrategy::Abort](Decoder#recoverstrategyabort-default).
42    Abort,
43}
44
45type Result<T> = result::Result<T, Error>;
46
47/// Represents the decoder builder.
48///
49/// # Example
50/// ```
51/// use rfc2047_decoder::{Decoder, RecoverStrategy};
52///
53/// let decoder = Decoder::new()
54///                 .too_long_encoded_word_strategy(RecoverStrategy::Skip);
55/// let decoded_str = decoder.decode("=?UTF-8?B?c3Ry?=").unwrap();
56///
57/// assert_eq!(decoded_str, "str");
58/// ```
59#[derive(Debug, Clone, Eq, PartialEq)]
60pub struct Decoder {
61    /// Determines which strategy should be used, if the parser encounters
62    /// encoded words which are longer than allowed in the RFC (it's longer than 75 chars).
63    pub too_long_encoded_word: RecoverStrategy,
64}
65
66impl Decoder {
67    /// Equals [Decoder::default].
68    pub fn new() -> Self {
69        Self::default()
70    }
71
72    /// Set the strategy if the decoder finds an encoded word which is too long.
73    ///
74    /// # Examples
75    ///
76    /// Each example uses the same encoded message:
77    /// ```txt
78    /// =?utf-8?B?TG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdC4gVXQgaW50ZXJkdW0gcXVhbSBldSBmYWNpbGlzaXMgb3JuYXJlLg==?=
79    /// ```
80    /// which exceeds the maximum length of 75 chars so it's actually invalid.
81    ///
82    /// ## RecoverStrategy::Skip
83    /// Skips the invalid encoded word and parses it as clear text.
84    ///
85    /// ```rust
86    /// use rfc2047_decoder::{Decoder, RecoverStrategy};
87    ///
88    /// let message = "=?utf-8?B?TG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdC4gVXQgaW50ZXJkdW0gcXVhbSBldSBmYWNpbGlzaXMgb3JuYXJlLg==?=";
89    /// let decoder = Decoder::new()
90    ///                 .too_long_encoded_word_strategy(RecoverStrategy::Skip);
91    ///
92    /// let parsed = decoder.decode(message).unwrap();
93    ///
94    /// // nothing changed!
95    /// assert_eq!(parsed, message);
96    /// ```
97    ///
98    /// ## RecoverStrategy::Decode
99    /// Although the encoded word is invalid, keep decoding it.
100    ///
101    /// ```rust
102    /// use rfc2047_decoder::{Decoder, RecoverStrategy};
103    ///
104    /// let message = "=?utf-8?B?TG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdC4gVXQgaW50ZXJkdW0gcXVhbSBldSBmYWNpbGlzaXMgb3JuYXJlLg==?=";
105    /// let decoder = Decoder::new()
106    ///                 .too_long_encoded_word_strategy(RecoverStrategy::Decode);
107    ///
108    /// let parsed = decoder.decode(message).unwrap();
109    ///
110    /// // could you decode it? ;)
111    /// let expected_result = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Ut interdum quam eu facilisis ornare.";
112    ///
113    /// assert_eq!(parsed, expected_result);
114    /// ```
115    ///
116    /// ## RecoverStrategy::Abort (default)
117    /// The parser will return an `Err` and collects all encoded words which are
118    /// too long. You can use them afterwards for error messages for example.
119    ///
120    /// ```rust
121    /// use rfc2047_decoder::{Decoder, RecoverStrategy, Error::{self, Lexer}};
122    /// use rfc2047_decoder::LexerError::ParseEncodedWordTooLongError;
123    /// use rfc2047_decoder::TooLongEncodedWords;
124    ///
125    /// let message = "=?utf-8?B?TG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdC4gVXQgaW50ZXJkdW0gcXVhbSBldSBmYWNpbGlzaXMgb3JuYXJlLg==?=";
126    /// // `RecoverStrategy::Abort` is the default strategy
127    /// let decoder = Decoder::new();
128    ///
129    /// let parsed = decoder.decode(message);
130    ///
131    /// assert_eq!(parsed, Err(Lexer(ParseEncodedWordTooLongError(TooLongEncodedWords(vec!["=?utf-8?B?TG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdC4gVXQgaW50ZXJkdW0gcXVhbSBldSBmYWNpbGlzaXMgb3JuYXJlLg==?=".to_string()])))));
132    /// ```
133    pub fn too_long_encoded_word_strategy(mut self, strategy: RecoverStrategy) -> Self {
134        self.too_long_encoded_word = strategy;
135        self
136    }
137
138    /// Decodes the given RFC 2047 MIME Message Header encoded string.
139    pub fn decode<T: AsRef<[u8]>>(self, encoded_str: T) -> Result<String> {
140        let text_tokens = lexer::run(encoded_str.as_ref(), self)?;
141        let parsed_text = parser::run(text_tokens)?;
142        let evaluated_string = evaluator::run(parsed_text)?;
143
144        Ok(evaluated_string)
145    }
146}
147
148impl Default for Decoder {
149    /// Returns the decoder with the following default "settings":
150    ///
151    /// - `too_long_encoded_word`: [RecoverStrategy::Abort]
152    fn default() -> Self {
153        Self {
154            too_long_encoded_word: RecoverStrategy::Abort,
155        }
156    }
157}
158
159#[cfg(test)]
160mod tests {
161    /// Here are the main-tests which are listed here:
162    /// https://datatracker.ietf.org/doc/html/rfc2047#section-8
163    /// Scroll down until you see the table.
164    mod rfc_tests {
165        use crate::decode;
166
167        #[test]
168        fn decode_encoded_word_single_char() {
169            assert_eq!(decode("=?ISO-8859-1?Q?a?=").unwrap(), "a");
170        }
171
172        #[test]
173        fn decode_encoded_word_separated_by_whitespace() {
174            assert_eq!(decode("=?ISO-8859-1?Q?a?= b").unwrap(), "a b");
175        }
176
177        #[test]
178        fn decode_two_encoded_chars() {
179            assert_eq!(
180                decode("=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=").unwrap(),
181                "ab"
182            );
183        }
184
185        #[test]
186        fn whitespace_between_two_encoded_words_should_be_ignored() {
187            assert_eq!(
188                decode("=?ISO-8859-1?Q?a?=  =?ISO-8859-1?Q?b?=").unwrap(),
189                "ab"
190            );
191        }
192
193        #[test]
194        fn whitespace_chars_between_two_encoded_words_should_be_ignored() {
195            assert_eq!(
196                decode(
197                    "=?ISO-8859-1?Q?a?=               
198                     =?ISO-8859-1?Q?b?="
199                )
200                .unwrap(),
201                "ab"
202            );
203        }
204
205        #[test]
206        fn whitespace_encoded_in_encoded_word() {
207            assert_eq!(decode("=?ISO-8859-1?Q?a_b?=").unwrap(), "a b");
208        }
209
210        #[test]
211        fn ignore_whitespace_between_two_encoded_words_but_not_the_encoded_whitespace() {
212            assert_eq!(
213                decode("=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?=").unwrap(),
214                "a b"
215            );
216        }
217    }
218
219    /// Those are some custom tests
220    mod custom_tests {
221        use crate::decode;
222
223        #[test]
224        fn clear_empty() {
225            assert_eq!(decode("").unwrap(), "");
226        }
227
228        #[test]
229        fn clear_with_spaces() {
230            assert_eq!(decode("str with spaces").unwrap(), "str with spaces");
231        }
232
233        #[test]
234        fn utf8_qs_empty() {
235            assert_eq!(decode("").unwrap(), "");
236        }
237
238        #[test]
239        fn utf8_qs_with_str() {
240            assert_eq!(decode("=?UTF-8?Q?str?=").unwrap(), "str");
241        }
242
243        #[test]
244        fn utf8_qs_with_spaces() {
245            assert_eq!(
246                decode("=?utf8?q?str_with_spaces?=").unwrap(),
247                "str with spaces"
248            );
249        }
250
251        #[test]
252        fn utf8_qs_with_spec_chars() {
253            assert_eq!(
254                decode("=?utf8?q?str_with_special_=C3=A7h=C3=A0r=C3=9F?=").unwrap(),
255                "str with special çhàrß"
256            );
257        }
258
259        #[test]
260        fn utf8_qs_double() {
261            assert_eq!(
262                decode("=?UTF-8?Q?str?=\r\n =?UTF-8?Q?str?=").unwrap(),
263                "strstr"
264            );
265            assert_eq!(
266                decode("=?UTF-8?Q?str?=\n =?UTF-8?Q?str?=").unwrap(),
267                "strstr"
268            );
269            assert_eq!(decode("=?UTF-8?Q?str?= =?UTF-8?Q?str?=").unwrap(), "strstr");
270            assert_eq!(decode("=?UTF-8?Q?str?==?UTF-8?Q?str?=").unwrap(), "strstr");
271        }
272
273        #[test]
274        fn utf8_b64_empty() {
275            assert_eq!(decode("=?UTF-8?B??=").unwrap(), "");
276        }
277
278        #[test]
279        fn utf8_b64_with_str() {
280            assert_eq!(decode("=?UTF-8?B?c3Ry?=").unwrap(), "str");
281        }
282
283        #[test]
284        fn utf8_b64_with_spaces() {
285            assert_eq!(
286                decode("=?utf8?b?c3RyIHdpdGggc3BhY2Vz?=").unwrap(),
287                "str with spaces"
288            );
289        }
290
291        #[test]
292        fn utf8_b64_with_spec_chars() {
293            assert_eq!(
294                decode("=?utf8?b?c3RyIHdpdGggc3BlY2lhbCDDp2jDoHLDnw==?=").unwrap(),
295                "str with special çhàrß"
296            );
297        }
298
299        #[test]
300        fn utf8_b64_trailing_bit() {
301            assert_eq!(
302                decode("=?utf-8?B?UG9ydGFsZSBIYWNraW5nVGVhbW==?=").unwrap(),
303                "Portale HackingTeam",
304            );
305        }
306    }
307}