rfc2047_decoder/decoder.rs
1use crate::{evaluator, lexer, parser};
2use thiserror::Error;
3
4/// The possible errors which can occur while parsing the string.
5#[derive(Error, Debug, PartialEq)]
6pub enum Error {
7 /// Symbolises that an error occured in the lexer.
8 #[error(transparent)]
9 Lexer(#[from] lexer::Error),
10
11 /// Symbolises that an error occured in the parser.
12 #[error(transparent)]
13 Parser(#[from] parser::Error),
14
15 /// Symbolises that an error occured in the evaluator.
16 #[error(transparent)]
17 Evaluator(#[from] evaluator::Error),
18}
19
20/// Determines which strategy should be used if an encoded word isn't encoded as
21/// described in the RFC.
22#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
23pub enum RecoverStrategy {
24 /// Decode the encoded word although it's incorrectly encoded.
25 ///
26 /// # Example
27 /// Take a look to [Decoder#RecoveryStrategy::Decode](Decoder#recoverstrategydecode).
28 Decode,
29
30 /// Skip the incorrectly encoded encoded word.
31 ///
32 /// # Example
33 /// Take a look to [Decoder#RecoveryStrategy::Skip](Decoder#recoverstrategyskip).
34 Skip,
35
36 /// Abort the string-parsing and return an error.
37 ///
38 /// # Example
39 /// Take a look to [Decoder#RecoveryStrategy::Abort](Decoder#recoverstrategyabort-default).
40 Abort,
41}
42
43/// Represents the decoder builder.
44///
45/// # Example
46/// ```
47/// use rfc2047_decoder::{Decoder, RecoverStrategy};
48///
49/// let decoder = Decoder::new()
50/// .too_long_encoded_word_strategy(RecoverStrategy::Skip);
51/// let decoded_str = decoder.decode("=?UTF-8?B?c3Ry?=").unwrap();
52///
53/// assert_eq!(decoded_str, "str");
54/// ```
55#[derive(Debug, Clone, Eq, PartialEq)]
56pub struct Decoder {
57 /// Determines which strategy should be used, if the parser encounters
58 /// encoded words which are longer than allowed in the RFC (it's longer than 75 chars).
59 pub too_long_encoded_word: RecoverStrategy,
60}
61
62impl Decoder {
63 /// Equals [Decoder::default].
64 pub fn new() -> Self {
65 Self::default()
66 }
67
68 /// Set the strategy if the decoder finds an encoded word which is too long.
69 ///
70 /// # Examples
71 ///
72 /// Each example uses the same encoded message:
73 /// ```txt
74 /// =?utf-8?B?TG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdC4gVXQgaW50ZXJkdW0gcXVhbSBldSBmYWNpbGlzaXMgb3JuYXJlLg==?=
75 /// ```
76 /// which exceeds the maximum length of 75 chars so it's actually invalid.
77 ///
78 /// ## RecoverStrategy::Skip
79 /// Skips the invalid encoded word and parses it as clear text.
80 ///
81 /// ```rust
82 /// use rfc2047_decoder::{Decoder, RecoverStrategy};
83 ///
84 /// let message = "=?utf-8?B?TG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdC4gVXQgaW50ZXJkdW0gcXVhbSBldSBmYWNpbGlzaXMgb3JuYXJlLg==?=";
85 /// let decoder = Decoder::new()
86 /// .too_long_encoded_word_strategy(RecoverStrategy::Skip);
87 ///
88 /// let parsed = decoder.decode(message).unwrap();
89 ///
90 /// // nothing changed!
91 /// assert_eq!(parsed, message);
92 /// ```
93 ///
94 /// ## RecoverStrategy::Decode
95 /// Although the encoded word is invalid, keep decoding it.
96 ///
97 /// ```rust
98 /// use rfc2047_decoder::{Decoder, RecoverStrategy};
99 ///
100 /// let message = "=?utf-8?B?TG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdC4gVXQgaW50ZXJkdW0gcXVhbSBldSBmYWNpbGlzaXMgb3JuYXJlLg==?=";
101 /// let decoder = Decoder::new()
102 /// .too_long_encoded_word_strategy(RecoverStrategy::Decode);
103 ///
104 /// let parsed = decoder.decode(message).unwrap();
105 ///
106 /// // could you decode it? ;)
107 /// let expected_result = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Ut interdum quam eu facilisis ornare.";
108 ///
109 /// assert_eq!(parsed, expected_result);
110 /// ```
111 ///
112 /// ## RecoverStrategy::Abort (default)
113 /// The parser will return an `Err` and collects all encoded words which are
114 /// too long. You can use them afterwards for error messages for example.
115 ///
116 /// ```rust
117 /// use rfc2047_decoder::{Decoder, RecoverStrategy, Error::{self, Lexer}};
118 /// use rfc2047_decoder::LexerError::ParseEncodedWordTooLongError;
119 /// use rfc2047_decoder::TooLongEncodedWords;
120 ///
121 /// let message = "=?utf-8?B?TG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdC4gVXQgaW50ZXJkdW0gcXVhbSBldSBmYWNpbGlzaXMgb3JuYXJlLg==?=";
122 /// // `RecoverStrategy::Abort` is the default strategy
123 /// let decoder = Decoder::new();
124 ///
125 /// let parsed = decoder.decode(message);
126 ///
127 /// assert_eq!(parsed, Err(Lexer(ParseEncodedWordTooLongError(TooLongEncodedWords(vec!["=?utf-8?B?TG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdC4gVXQgaW50ZXJkdW0gcXVhbSBldSBmYWNpbGlzaXMgb3JuYXJlLg==?=".to_string()])))));
128 /// ```
129 pub fn too_long_encoded_word_strategy(mut self, strategy: RecoverStrategy) -> Self {
130 self.too_long_encoded_word = strategy;
131 self
132 }
133
134 /// Decodes the given RFC 2047 MIME Message Header encoded string.
135 pub fn decode<T: AsRef<[u8]>>(self, encoded_str: T) -> Result<String, Error> {
136 let text_tokens = lexer::run(encoded_str.as_ref(), self.too_long_encoded_word)?;
137 let parsed_text = parser::run(text_tokens)?;
138 let evaluated_string = evaluator::run(parsed_text)?;
139
140 Ok(evaluated_string)
141 }
142}
143
144impl Default for Decoder {
145 /// Returns the decoder with the following default "settings":
146 ///
147 /// - `too_long_encoded_word`: [RecoverStrategy::Abort]
148 fn default() -> Self {
149 Self {
150 too_long_encoded_word: RecoverStrategy::Abort,
151 }
152 }
153}
154
155#[cfg(test)]
156mod tests {
157 /// Here are the main-tests which are listed here:
158 /// https://datatracker.ietf.org/doc/html/rfc2047#section-8
159 /// Scroll down until you see the table.
160 mod rfc_tests {
161 use crate::decode;
162
163 #[test]
164 fn decode_encoded_word_single_char() {
165 assert_eq!(decode("=?ISO-8859-1?Q?a?=").unwrap(), "a");
166 }
167
168 #[test]
169 fn decode_encoded_word_separated_by_whitespace() {
170 assert_eq!(decode("=?ISO-8859-1?Q?a?= b").unwrap(), "a b");
171 }
172
173 #[test]
174 fn decode_two_encoded_chars() {
175 assert_eq!(
176 decode("=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=").unwrap(),
177 "ab"
178 );
179 }
180
181 #[test]
182 fn whitespace_between_two_encoded_words_should_be_ignored() {
183 assert_eq!(
184 decode("=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=").unwrap(),
185 "ab"
186 );
187 }
188
189 #[test]
190 fn whitespace_chars_between_two_encoded_words_should_be_ignored() {
191 assert_eq!(
192 decode(
193 "=?ISO-8859-1?Q?a?=
194 =?ISO-8859-1?Q?b?="
195 )
196 .unwrap(),
197 "ab"
198 );
199 }
200
201 #[test]
202 fn whitespace_encoded_in_encoded_word() {
203 assert_eq!(decode("=?ISO-8859-1?Q?a_b?=").unwrap(), "a b");
204 }
205
206 #[test]
207 fn ignore_whitespace_between_two_encoded_words_but_not_the_encoded_whitespace() {
208 assert_eq!(
209 decode("=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?=").unwrap(),
210 "a b"
211 );
212 }
213 }
214
215 /// Those are some custom tests
216 mod custom_tests {
217 use crate::decode;
218
219 #[test]
220 fn clear_empty() {
221 assert_eq!(decode("").unwrap(), "");
222 }
223
224 #[test]
225 fn clear_with_spaces() {
226 assert_eq!(decode("str with spaces").unwrap(), "str with spaces");
227 }
228
229 #[test]
230 fn utf8_qs_empty() {
231 assert_eq!(decode("").unwrap(), "");
232 }
233
234 #[test]
235 fn utf8_qs_with_str() {
236 assert_eq!(decode("=?UTF-8?Q?str?=").unwrap(), "str");
237 }
238
239 #[test]
240 fn utf8_qs_with_spaces() {
241 assert_eq!(
242 decode("=?utf8?q?str_with_spaces?=").unwrap(),
243 "str with spaces"
244 );
245 }
246
247 #[test]
248 fn utf8_qs_with_spec_chars() {
249 assert_eq!(
250 decode("=?utf8?q?str_with_special_=C3=A7h=C3=A0r=C3=9F?=").unwrap(),
251 "str with special çhàrß"
252 );
253 }
254
255 #[test]
256 fn utf8_qs_double() {
257 assert_eq!(
258 decode("=?UTF-8?Q?str?=\r\n =?UTF-8?Q?str?=").unwrap(),
259 "strstr"
260 );
261 assert_eq!(
262 decode("=?UTF-8?Q?str?=\n =?UTF-8?Q?str?=").unwrap(),
263 "strstr"
264 );
265 assert_eq!(decode("=?UTF-8?Q?str?= =?UTF-8?Q?str?=").unwrap(), "strstr");
266 assert_eq!(decode("=?UTF-8?Q?str?==?UTF-8?Q?str?=").unwrap(), "strstr");
267 }
268
269 #[test]
270 fn utf8_b64_empty() {
271 assert_eq!(decode("=?UTF-8?B??=").unwrap(), "");
272 }
273
274 #[test]
275 fn utf8_b64_with_str() {
276 assert_eq!(decode("=?UTF-8?B?c3Ry?=").unwrap(), "str");
277 }
278
279 #[test]
280 fn utf8_b64_with_spaces() {
281 assert_eq!(
282 decode("=?utf8?b?c3RyIHdpdGggc3BhY2Vz?=").unwrap(),
283 "str with spaces"
284 );
285 }
286
287 #[test]
288 fn utf8_b64_with_spec_chars() {
289 assert_eq!(
290 decode("=?utf8?b?c3RyIHdpdGggc3BlY2lhbCDDp2jDoHLDnw==?=").unwrap(),
291 "str with special çhàrß"
292 );
293 }
294
295 #[test]
296 fn utf8_b64_trailing_bit() {
297 assert_eq!(
298 decode("=?utf-8?B?UG9ydGFsZSBIYWNraW5nVGVhbW==?=").unwrap(),
299 "Portale HackingTeam",
300 );
301 }
302 }
303}