rfc2047_decoder/decoder.rs
1use std::result;
2use thiserror::Error;
3
4use crate::{evaluator, lexer, parser};
5
6/// The possible errors which can occur while parsing the string.
7#[derive(Error, Debug, PartialEq)]
8pub enum Error {
9 /// Symbolises that an error occured in the lexer.
10 #[error(transparent)]
11 Lexer(#[from] lexer::Error),
12
13 /// Symbolises that an error occured in the parser.
14 #[error(transparent)]
15 Parser(#[from] parser::Error),
16
17 /// Symbolises that an error occured in the evaluator.
18 #[error(transparent)]
19 Evaluator(#[from] evaluator::Error),
20}
21
22/// Determines which strategy should be used if an encoded word isn't encoded as
23/// described in the RFC.
24#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
25pub enum RecoverStrategy {
26 /// Decode the encoded word although it's incorrectly encoded.
27 ///
28 /// # Example
29 /// Take a look to [Decoder#RecoveryStrategy::Decode](Decoder#recoverstrategydecode).
30 Decode,
31
32 /// Skip the incorrectly encoded encoded word.
33 ///
34 /// # Example
35 /// Take a look to [Decoder#RecoveryStrategy::Skip](Decoder#recoverstrategyskip).
36 Skip,
37
38 /// Abort the string-parsing and return an error.
39 ///
40 /// # Example
41 /// Take a look to [Decoder#RecoveryStrategy::Abort](Decoder#recoverstrategyabort-default).
42 Abort,
43}
44
45type Result<T> = result::Result<T, Error>;
46
47/// Represents the decoder builder.
48///
49/// # Example
50/// ```
51/// use rfc2047_decoder::{Decoder, RecoverStrategy};
52///
53/// let decoder = Decoder::new()
54/// .too_long_encoded_word_strategy(RecoverStrategy::Skip);
55/// let decoded_str = decoder.decode("=?UTF-8?B?c3Ry?=").unwrap();
56///
57/// assert_eq!(decoded_str, "str");
58/// ```
59#[derive(Debug, Clone, Eq, PartialEq)]
60pub struct Decoder {
61 /// Determines which strategy should be used, if the parser encounters
62 /// encoded words which are longer than allowed in the RFC (it's longer than 75 chars).
63 pub too_long_encoded_word: RecoverStrategy,
64}
65
66impl Decoder {
67 /// Equals [Decoder::default].
68 pub fn new() -> Self {
69 Self::default()
70 }
71
72 /// Set the strategy if the decoder finds an encoded word which is too long.
73 ///
74 /// # Examples
75 ///
76 /// Each example uses the same encoded message:
77 /// ```txt
78 /// =?utf-8?B?TG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdC4gVXQgaW50ZXJkdW0gcXVhbSBldSBmYWNpbGlzaXMgb3JuYXJlLg==?=
79 /// ```
80 /// which exceeds the maximum length of 75 chars so it's actually invalid.
81 ///
82 /// ## RecoverStrategy::Skip
83 /// Skips the invalid encoded word and parses it as clear text.
84 ///
85 /// ```rust
86 /// use rfc2047_decoder::{Decoder, RecoverStrategy};
87 ///
88 /// let message = "=?utf-8?B?TG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdC4gVXQgaW50ZXJkdW0gcXVhbSBldSBmYWNpbGlzaXMgb3JuYXJlLg==?=";
89 /// let decoder = Decoder::new()
90 /// .too_long_encoded_word_strategy(RecoverStrategy::Skip);
91 ///
92 /// let parsed = decoder.decode(message).unwrap();
93 ///
94 /// // nothing changed!
95 /// assert_eq!(parsed, message);
96 /// ```
97 ///
98 /// ## RecoverStrategy::Decode
99 /// Although the encoded word is invalid, keep decoding it.
100 ///
101 /// ```rust
102 /// use rfc2047_decoder::{Decoder, RecoverStrategy};
103 ///
104 /// let message = "=?utf-8?B?TG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdC4gVXQgaW50ZXJkdW0gcXVhbSBldSBmYWNpbGlzaXMgb3JuYXJlLg==?=";
105 /// let decoder = Decoder::new()
106 /// .too_long_encoded_word_strategy(RecoverStrategy::Decode);
107 ///
108 /// let parsed = decoder.decode(message).unwrap();
109 ///
110 /// // could you decode it? ;)
111 /// let expected_result = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Ut interdum quam eu facilisis ornare.";
112 ///
113 /// assert_eq!(parsed, expected_result);
114 /// ```
115 ///
116 /// ## RecoverStrategy::Abort (default)
117 /// The parser will return an `Err` and collects all encoded words which are
118 /// too long. You can use them afterwards for error messages for example.
119 ///
120 /// ```rust
121 /// use rfc2047_decoder::{Decoder, RecoverStrategy, Error::{self, Lexer}};
122 /// use rfc2047_decoder::LexerError::ParseEncodedWordTooLongError;
123 /// use rfc2047_decoder::TooLongEncodedWords;
124 ///
125 /// let message = "=?utf-8?B?TG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdC4gVXQgaW50ZXJkdW0gcXVhbSBldSBmYWNpbGlzaXMgb3JuYXJlLg==?=";
126 /// // `RecoverStrategy::Abort` is the default strategy
127 /// let decoder = Decoder::new();
128 ///
129 /// let parsed = decoder.decode(message);
130 ///
131 /// assert_eq!(parsed, Err(Lexer(ParseEncodedWordTooLongError(TooLongEncodedWords(vec!["=?utf-8?B?TG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdC4gVXQgaW50ZXJkdW0gcXVhbSBldSBmYWNpbGlzaXMgb3JuYXJlLg==?=".to_string()])))));
132 /// ```
133 pub fn too_long_encoded_word_strategy(mut self, strategy: RecoverStrategy) -> Self {
134 self.too_long_encoded_word = strategy;
135 self
136 }
137
138 /// Decodes the given RFC 2047 MIME Message Header encoded string.
139 pub fn decode<T: AsRef<[u8]>>(self, encoded_str: T) -> Result<String> {
140 let text_tokens = lexer::run(encoded_str.as_ref(), self)?;
141 let parsed_text = parser::run(text_tokens)?;
142 let evaluated_string = evaluator::run(parsed_text)?;
143
144 Ok(evaluated_string)
145 }
146}
147
148impl Default for Decoder {
149 /// Returns the decoder with the following default "settings":
150 ///
151 /// - `too_long_encoded_word`: [RecoverStrategy::Abort]
152 fn default() -> Self {
153 Self {
154 too_long_encoded_word: RecoverStrategy::Abort,
155 }
156 }
157}
158
159#[cfg(test)]
160mod tests {
161 /// Here are the main-tests which are listed here:
162 /// https://datatracker.ietf.org/doc/html/rfc2047#section-8
163 /// Scroll down until you see the table.
164 mod rfc_tests {
165 use crate::decode;
166
167 #[test]
168 fn decode_encoded_word_single_char() {
169 assert_eq!(decode("=?ISO-8859-1?Q?a?=").unwrap(), "a");
170 }
171
172 #[test]
173 fn decode_encoded_word_separated_by_whitespace() {
174 assert_eq!(decode("=?ISO-8859-1?Q?a?= b").unwrap(), "a b");
175 }
176
177 #[test]
178 fn decode_two_encoded_chars() {
179 assert_eq!(
180 decode("=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=").unwrap(),
181 "ab"
182 );
183 }
184
185 #[test]
186 fn whitespace_between_two_encoded_words_should_be_ignored() {
187 assert_eq!(
188 decode("=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=").unwrap(),
189 "ab"
190 );
191 }
192
193 #[test]
194 fn whitespace_chars_between_two_encoded_words_should_be_ignored() {
195 assert_eq!(
196 decode(
197 "=?ISO-8859-1?Q?a?=
198 =?ISO-8859-1?Q?b?="
199 )
200 .unwrap(),
201 "ab"
202 );
203 }
204
205 #[test]
206 fn whitespace_encoded_in_encoded_word() {
207 assert_eq!(decode("=?ISO-8859-1?Q?a_b?=").unwrap(), "a b");
208 }
209
210 #[test]
211 fn ignore_whitespace_between_two_encoded_words_but_not_the_encoded_whitespace() {
212 assert_eq!(
213 decode("=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?=").unwrap(),
214 "a b"
215 );
216 }
217 }
218
219 /// Those are some custom tests
220 mod custom_tests {
221 use crate::decode;
222
223 #[test]
224 fn clear_empty() {
225 assert_eq!(decode("").unwrap(), "");
226 }
227
228 #[test]
229 fn clear_with_spaces() {
230 assert_eq!(decode("str with spaces").unwrap(), "str with spaces");
231 }
232
233 #[test]
234 fn utf8_qs_empty() {
235 assert_eq!(decode("").unwrap(), "");
236 }
237
238 #[test]
239 fn utf8_qs_with_str() {
240 assert_eq!(decode("=?UTF-8?Q?str?=").unwrap(), "str");
241 }
242
243 #[test]
244 fn utf8_qs_with_spaces() {
245 assert_eq!(
246 decode("=?utf8?q?str_with_spaces?=").unwrap(),
247 "str with spaces"
248 );
249 }
250
251 #[test]
252 fn utf8_qs_with_spec_chars() {
253 assert_eq!(
254 decode("=?utf8?q?str_with_special_=C3=A7h=C3=A0r=C3=9F?=").unwrap(),
255 "str with special çhàrß"
256 );
257 }
258
259 #[test]
260 fn utf8_qs_double() {
261 assert_eq!(
262 decode("=?UTF-8?Q?str?=\r\n =?UTF-8?Q?str?=").unwrap(),
263 "strstr"
264 );
265 assert_eq!(
266 decode("=?UTF-8?Q?str?=\n =?UTF-8?Q?str?=").unwrap(),
267 "strstr"
268 );
269 assert_eq!(decode("=?UTF-8?Q?str?= =?UTF-8?Q?str?=").unwrap(), "strstr");
270 assert_eq!(decode("=?UTF-8?Q?str?==?UTF-8?Q?str?=").unwrap(), "strstr");
271 }
272
273 #[test]
274 fn utf8_b64_empty() {
275 assert_eq!(decode("=?UTF-8?B??=").unwrap(), "");
276 }
277
278 #[test]
279 fn utf8_b64_with_str() {
280 assert_eq!(decode("=?UTF-8?B?c3Ry?=").unwrap(), "str");
281 }
282
283 #[test]
284 fn utf8_b64_with_spaces() {
285 assert_eq!(
286 decode("=?utf8?b?c3RyIHdpdGggc3BhY2Vz?=").unwrap(),
287 "str with spaces"
288 );
289 }
290
291 #[test]
292 fn utf8_b64_with_spec_chars() {
293 assert_eq!(
294 decode("=?utf8?b?c3RyIHdpdGggc3BlY2lhbCDDp2jDoHLDnw==?=").unwrap(),
295 "str with special çhàrß"
296 );
297 }
298
299 #[test]
300 fn utf8_b64_trailing_bit() {
301 assert_eq!(
302 decode("=?utf-8?B?UG9ydGFsZSBIYWNraW5nVGVhbW==?=").unwrap(),
303 "Portale HackingTeam",
304 );
305 }
306 }
307}