Skip to main content

rfc2047_decoder/
decoder.rs

1use crate::{evaluator, lexer, parser};
2use thiserror::Error;
3
4/// The possible errors which can occur while parsing the string.
5#[derive(Error, Debug, PartialEq)]
6pub enum Error {
7    /// Symbolises that an error occured in the lexer.
8    #[error(transparent)]
9    Lexer(#[from] lexer::Error),
10
11    /// Symbolises that an error occured in the parser.
12    #[error(transparent)]
13    Parser(#[from] parser::Error),
14
15    /// Symbolises that an error occured in the evaluator.
16    #[error(transparent)]
17    Evaluator(#[from] evaluator::Error),
18}
19
20/// Determines which strategy should be used if an encoded word isn't encoded as
21/// described in the RFC.
22#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
23pub enum RecoverStrategy {
24    /// Decode the encoded word although it's incorrectly encoded.
25    ///
26    /// # Example
27    /// Take a look to [Decoder#RecoveryStrategy::Decode](Decoder#recoverstrategydecode).
28    Decode,
29
30    /// Skip the incorrectly encoded encoded word.
31    ///
32    /// # Example
33    /// Take a look to [Decoder#RecoveryStrategy::Skip](Decoder#recoverstrategyskip).
34    Skip,
35
36    /// Abort the string-parsing and return an error.
37    ///
38    /// # Example
39    /// Take a look to [Decoder#RecoveryStrategy::Abort](Decoder#recoverstrategyabort-default).
40    Abort,
41}
42
43/// Represents the decoder builder.
44///
45/// # Example
46/// ```
47/// use rfc2047_decoder::{Decoder, RecoverStrategy};
48///
49/// let decoder = Decoder::new()
50///                 .too_long_encoded_word_strategy(RecoverStrategy::Skip);
51/// let decoded_str = decoder.decode("=?UTF-8?B?c3Ry?=").unwrap();
52///
53/// assert_eq!(decoded_str, "str");
54/// ```
55#[derive(Debug, Clone, Eq, PartialEq)]
56pub struct Decoder {
57    /// Determines which strategy should be used, if the parser encounters
58    /// encoded words which are longer than allowed in the RFC (it's longer than 75 chars).
59    pub too_long_encoded_word: RecoverStrategy,
60}
61
62impl Decoder {
63    /// Equals [Decoder::default].
64    pub fn new() -> Self {
65        Self::default()
66    }
67
68    /// Set the strategy if the decoder finds an encoded word which is too long.
69    ///
70    /// # Examples
71    ///
72    /// Each example uses the same encoded message:
73    /// ```txt
74    /// =?utf-8?B?TG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdC4gVXQgaW50ZXJkdW0gcXVhbSBldSBmYWNpbGlzaXMgb3JuYXJlLg==?=
75    /// ```
76    /// which exceeds the maximum length of 75 chars so it's actually invalid.
77    ///
78    /// ## RecoverStrategy::Skip
79    /// Skips the invalid encoded word and parses it as clear text.
80    ///
81    /// ```rust
82    /// use rfc2047_decoder::{Decoder, RecoverStrategy};
83    ///
84    /// let message = "=?utf-8?B?TG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdC4gVXQgaW50ZXJkdW0gcXVhbSBldSBmYWNpbGlzaXMgb3JuYXJlLg==?=";
85    /// let decoder = Decoder::new()
86    ///                 .too_long_encoded_word_strategy(RecoverStrategy::Skip);
87    ///
88    /// let parsed = decoder.decode(message).unwrap();
89    ///
90    /// // nothing changed!
91    /// assert_eq!(parsed, message);
92    /// ```
93    ///
94    /// ## RecoverStrategy::Decode
95    /// Although the encoded word is invalid, keep decoding it.
96    ///
97    /// ```rust
98    /// use rfc2047_decoder::{Decoder, RecoverStrategy};
99    ///
100    /// let message = "=?utf-8?B?TG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdC4gVXQgaW50ZXJkdW0gcXVhbSBldSBmYWNpbGlzaXMgb3JuYXJlLg==?=";
101    /// let decoder = Decoder::new()
102    ///                 .too_long_encoded_word_strategy(RecoverStrategy::Decode);
103    ///
104    /// let parsed = decoder.decode(message).unwrap();
105    ///
106    /// // could you decode it? ;)
107    /// let expected_result = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Ut interdum quam eu facilisis ornare.";
108    ///
109    /// assert_eq!(parsed, expected_result);
110    /// ```
111    ///
112    /// ## RecoverStrategy::Abort (default)
113    /// The parser will return an `Err` and collects all encoded words which are
114    /// too long. You can use them afterwards for error messages for example.
115    ///
116    /// ```rust
117    /// use rfc2047_decoder::{Decoder, RecoverStrategy, Error::{self, Lexer}};
118    /// use rfc2047_decoder::LexerError::ParseEncodedWordTooLongError;
119    /// use rfc2047_decoder::TooLongEncodedWords;
120    ///
121    /// let message = "=?utf-8?B?TG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdC4gVXQgaW50ZXJkdW0gcXVhbSBldSBmYWNpbGlzaXMgb3JuYXJlLg==?=";
122    /// // `RecoverStrategy::Abort` is the default strategy
123    /// let decoder = Decoder::new();
124    ///
125    /// let parsed = decoder.decode(message);
126    ///
127    /// assert_eq!(parsed, Err(Lexer(ParseEncodedWordTooLongError(TooLongEncodedWords(vec!["=?utf-8?B?TG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdC4gVXQgaW50ZXJkdW0gcXVhbSBldSBmYWNpbGlzaXMgb3JuYXJlLg==?=".to_string()])))));
128    /// ```
129    pub fn too_long_encoded_word_strategy(mut self, strategy: RecoverStrategy) -> Self {
130        self.too_long_encoded_word = strategy;
131        self
132    }
133
134    /// Decodes the given RFC 2047 MIME Message Header encoded string.
135    pub fn decode<T: AsRef<[u8]>>(self, encoded_str: T) -> Result<String, Error> {
136        let text_tokens = lexer::run(encoded_str.as_ref(), self.too_long_encoded_word)?;
137        let parsed_text = parser::run(text_tokens)?;
138        let evaluated_string = evaluator::run(parsed_text)?;
139
140        Ok(evaluated_string)
141    }
142}
143
144impl Default for Decoder {
145    /// Returns the decoder with the following default "settings":
146    ///
147    /// - `too_long_encoded_word`: [RecoverStrategy::Abort]
148    fn default() -> Self {
149        Self {
150            too_long_encoded_word: RecoverStrategy::Abort,
151        }
152    }
153}
154
155#[cfg(test)]
156mod tests {
157    /// Here are the main-tests which are listed here:
158    /// https://datatracker.ietf.org/doc/html/rfc2047#section-8
159    /// Scroll down until you see the table.
160    mod rfc_tests {
161        use crate::decode;
162
163        #[test]
164        fn decode_encoded_word_single_char() {
165            assert_eq!(decode("=?ISO-8859-1?Q?a?=").unwrap(), "a");
166        }
167
168        #[test]
169        fn decode_encoded_word_separated_by_whitespace() {
170            assert_eq!(decode("=?ISO-8859-1?Q?a?= b").unwrap(), "a b");
171        }
172
173        #[test]
174        fn decode_two_encoded_chars() {
175            assert_eq!(
176                decode("=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=").unwrap(),
177                "ab"
178            );
179        }
180
181        #[test]
182        fn whitespace_between_two_encoded_words_should_be_ignored() {
183            assert_eq!(
184                decode("=?ISO-8859-1?Q?a?=  =?ISO-8859-1?Q?b?=").unwrap(),
185                "ab"
186            );
187        }
188
189        #[test]
190        fn whitespace_chars_between_two_encoded_words_should_be_ignored() {
191            assert_eq!(
192                decode(
193                    "=?ISO-8859-1?Q?a?=               
194                     =?ISO-8859-1?Q?b?="
195                )
196                .unwrap(),
197                "ab"
198            );
199        }
200
201        #[test]
202        fn whitespace_encoded_in_encoded_word() {
203            assert_eq!(decode("=?ISO-8859-1?Q?a_b?=").unwrap(), "a b");
204        }
205
206        #[test]
207        fn ignore_whitespace_between_two_encoded_words_but_not_the_encoded_whitespace() {
208            assert_eq!(
209                decode("=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?=").unwrap(),
210                "a b"
211            );
212        }
213    }
214
215    /// Those are some custom tests
216    mod custom_tests {
217        use crate::decode;
218
219        #[test]
220        fn clear_empty() {
221            assert_eq!(decode("").unwrap(), "");
222        }
223
224        #[test]
225        fn clear_with_spaces() {
226            assert_eq!(decode("str with spaces").unwrap(), "str with spaces");
227        }
228
229        #[test]
230        fn utf8_qs_empty() {
231            assert_eq!(decode("").unwrap(), "");
232        }
233
234        #[test]
235        fn utf8_qs_with_str() {
236            assert_eq!(decode("=?UTF-8?Q?str?=").unwrap(), "str");
237        }
238
239        #[test]
240        fn utf8_qs_with_spaces() {
241            assert_eq!(
242                decode("=?utf8?q?str_with_spaces?=").unwrap(),
243                "str with spaces"
244            );
245        }
246
247        #[test]
248        fn utf8_qs_with_spec_chars() {
249            assert_eq!(
250                decode("=?utf8?q?str_with_special_=C3=A7h=C3=A0r=C3=9F?=").unwrap(),
251                "str with special çhàrß"
252            );
253        }
254
255        #[test]
256        fn utf8_qs_double() {
257            assert_eq!(
258                decode("=?UTF-8?Q?str?=\r\n =?UTF-8?Q?str?=").unwrap(),
259                "strstr"
260            );
261            assert_eq!(
262                decode("=?UTF-8?Q?str?=\n =?UTF-8?Q?str?=").unwrap(),
263                "strstr"
264            );
265            assert_eq!(decode("=?UTF-8?Q?str?= =?UTF-8?Q?str?=").unwrap(), "strstr");
266            assert_eq!(decode("=?UTF-8?Q?str?==?UTF-8?Q?str?=").unwrap(), "strstr");
267        }
268
269        #[test]
270        fn utf8_b64_empty() {
271            assert_eq!(decode("=?UTF-8?B??=").unwrap(), "");
272        }
273
274        #[test]
275        fn utf8_b64_with_str() {
276            assert_eq!(decode("=?UTF-8?B?c3Ry?=").unwrap(), "str");
277        }
278
279        #[test]
280        fn utf8_b64_with_spaces() {
281            assert_eq!(
282                decode("=?utf8?b?c3RyIHdpdGggc3BhY2Vz?=").unwrap(),
283                "str with spaces"
284            );
285        }
286
287        #[test]
288        fn utf8_b64_with_spec_chars() {
289            assert_eq!(
290                decode("=?utf8?b?c3RyIHdpdGggc3BlY2lhbCDDp2jDoHLDnw==?=").unwrap(),
291                "str with special çhàrß"
292            );
293        }
294
295        #[test]
296        fn utf8_b64_trailing_bit() {
297            assert_eq!(
298                decode("=?utf-8?B?UG9ydGFsZSBIYWNraW5nVGVhbW==?=").unwrap(),
299                "Portale HackingTeam",
300            );
301        }
302    }
303}