1use std::borrow::Cow;
8
9use crate::parsers::MessageStream;
10
11#[derive(PartialEq, Debug)]
12enum QuotedPrintableState {
13 None,
14 Eq,
15 Hex1,
16}
17
18pub fn quoted_printable_decode(bytes: &[u8]) -> Option<Vec<u8>> {
19 let mut buf = Vec::with_capacity(bytes.len());
20
21 let mut state = QuotedPrintableState::None;
22 let mut hex1 = 0;
23 let mut ws_count = 0;
24 let mut crlf = b"\n".as_ref();
25
26 for &ch in bytes {
27 match ch {
28 b'=' => {
29 if let QuotedPrintableState::None = state {
30 state = QuotedPrintableState::Eq
31 } else {
32 return None;
33 }
34 }
35 b'\n' => {
36 if QuotedPrintableState::Eq == state {
37 state = QuotedPrintableState::None;
38 } else {
39 if ws_count > 0 {
40 buf.truncate(buf.len() - ws_count);
41 }
42 buf.extend_from_slice(crlf);
43 }
44 ws_count = 0;
45 }
46 b'\r' => {
47 crlf = b"\r\n".as_ref();
48 }
49 _ => match state {
50 QuotedPrintableState::None => {
51 if ch.is_ascii_whitespace() {
52 ws_count += 1;
53 } else {
54 ws_count = 0;
55 }
56 buf.push(ch);
57 }
58 QuotedPrintableState::Eq => {
59 hex1 = HEX_MAP[ch as usize];
60
61 if hex1 != -1 {
62 state = QuotedPrintableState::Hex1;
63 } else if !ch.is_ascii_whitespace() {
64 return None;
65 }
66 }
67 QuotedPrintableState::Hex1 => {
68 let hex2 = HEX_MAP[ch as usize];
69
70 state = QuotedPrintableState::None;
71 if hex2 != -1 {
72 buf.push(((hex1 as u8) << 4) | hex2 as u8);
73 ws_count = 0;
74 } else {
75 return None;
76 }
77 }
78 },
79 }
80 }
81
82 buf.into()
83}
84
85#[inline(always)]
86pub fn quoted_printable_decode_char(hex1: u8, hex2: u8) -> Option<u8> {
87 let hex1 = HEX_MAP[hex1 as usize];
88 let hex2 = HEX_MAP[hex2 as usize];
89
90 (hex1 != -1 && hex2 != -1).then_some(((hex1 as u8) << 4) | hex2 as u8)
91}
92
93impl<'x> MessageStream<'x> {
94 pub fn decode_quoted_printable_mime(&mut self, boundary: &[u8]) -> (usize, Cow<'x, [u8]>) {
95 let mut buf = Vec::with_capacity(128);
96
97 let mut state = QuotedPrintableState::None;
98 let mut hex1 = 0;
99 let mut last_ch = 0;
100 let mut before_last_ch = 0;
101 let mut ws_count = 0;
102 let mut end_pos = self.offset();
103 let mut crlf = b"\n".as_ref();
104
105 self.checkpoint();
106
107 while let Some(&ch) = self.next() {
108 match ch {
109 b'=' => {
110 if let QuotedPrintableState::None = state {
111 state = QuotedPrintableState::Eq
112 } else {
113 self.restore();
114 return (usize::MAX, b""[..].into());
115 }
116 }
117 b'\n' => {
118 end_pos = if last_ch == b'\r' {
119 self.offset() - 2
120 } else {
121 self.offset() - 1
122 };
123 if QuotedPrintableState::Eq == state {
124 state = QuotedPrintableState::None;
125 } else {
126 if ws_count > 0 {
127 buf.truncate(buf.len() - ws_count);
128 }
129 buf.extend_from_slice(crlf);
130 }
131 ws_count = 0;
132 }
133 b'\r' => {
134 crlf = b"\r\n".as_ref();
135 }
136 b'-' if !boundary.is_empty() && last_ch == b'-' && self.try_skip(boundary) => {
137 if before_last_ch == b'\n' {
138 buf.truncate(buf.len() - (crlf.len() + 1));
139 } else {
140 buf.truncate(buf.len() - 1);
141 end_pos = self.offset() - boundary.len() - 2;
142 }
143
144 return (end_pos, buf.into());
145 }
146 _ => match state {
147 QuotedPrintableState::None => {
148 if ch.is_ascii_whitespace() {
149 ws_count += 1;
150 } else {
151 ws_count = 0;
152 }
153 buf.push(ch);
154 }
155 QuotedPrintableState::Eq => {
156 hex1 = HEX_MAP[ch as usize];
157 if hex1 != -1 {
158 state = QuotedPrintableState::Hex1;
159 } else if !ch.is_ascii_whitespace() {
160 self.restore();
161 return (usize::MAX, b""[..].into());
162 }
163 }
164 QuotedPrintableState::Hex1 => {
165 let hex2 = HEX_MAP[ch as usize];
166
167 state = QuotedPrintableState::None;
168 if hex2 != -1 {
169 buf.push(((hex1 as u8) << 4) | hex2 as u8);
170 ws_count = 0;
171 } else {
172 self.restore();
173 return (usize::MAX, b""[..].into());
174 }
175 }
176 },
177 }
178
179 before_last_ch = last_ch;
180 last_ch = ch;
181 }
182
183 (
184 if boundary.is_empty() {
185 self.offset()
186 } else {
187 self.restore();
188 usize::MAX
189 },
190 buf.into(),
191 )
192 }
193
194 pub fn decode_quoted_printable_word(&mut self) -> Option<Vec<u8>> {
195 let mut buf = Vec::with_capacity(64);
196
197 let mut state = QuotedPrintableState::None;
198 let mut hex1 = 0;
199
200 while let Some(&ch) = self.next() {
201 match ch {
202 b'=' => {
203 if let QuotedPrintableState::None = state {
204 state = QuotedPrintableState::Eq
205 } else {
206 break;
207 }
208 }
209 b'?' => {
210 if let Some(b'=') = self.peek() {
211 self.next();
212 return buf.into();
213 } else {
214 buf.push(b'?');
215 }
216 }
217 b'\n' => {
218 if let Some(b' ' | b'\t') = self.peek() {
219 loop {
220 self.next();
221 if !self.peek_next_is_space() {
222 break;
223 }
224 }
225 } else {
226 break;
227 }
228 }
229 b'_' => {
230 buf.push(b' ');
231 }
232 b'\r' => (),
233 _ => match state {
234 QuotedPrintableState::None => {
235 buf.push(ch);
236 }
237 QuotedPrintableState::Eq => {
238 hex1 = HEX_MAP[ch as usize];
239 if hex1 != -1 {
240 state = QuotedPrintableState::Hex1;
241 } else {
242 break;
244 }
245 }
246 QuotedPrintableState::Hex1 => {
247 let hex2 = HEX_MAP[ch as usize];
248 state = QuotedPrintableState::None;
249 if hex2 != -1 {
250 buf.push(((hex1 as u8) << 4) | hex2 as u8);
251 } else {
252 break;
254 }
255 }
256 },
257 }
258 }
259
260 None
261 }
262}
263
264pub static HEX_MAP: &[i8] = &[
271 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
272 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
273 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1,
274 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 10,
275 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
276 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
277 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
278 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
279 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
280 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
281 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
282];
283
284#[cfg(test)]
285mod tests {
286 use crate::parsers::MessageStream;
287
288 #[test]
289 fn decode_quoted_printable() {
290 for (encoded_str, expected_result) in [
291 (
292 concat!(
293 "J'interdis aux marchands de vanter trop leurs marchandises. ",
294 "Car ils se font=\nvite p=C3=A9dagogues et t'enseignent comme but ce ",
295 "qui n'est par essence qu=\n'un moyen, et te trompant ainsi sur la route ",
296 "=C3=A0 suivre les voil=C3=\n=A0 bient=C3=B4t qui te d=C3=A9gradent, car ",
297 "si leur musique est vulgaire il=\ns te fabriquent pour te la vendre une ",
298 "=C3=A2me vulgaire.\n=E2=80=94=E2=80=89Antoine de Saint-Exup=C3=A9ry, ",
299 "Citadelle (1948)"
300 ),
301 concat!(
302 "J'interdis aux marchands de vanter trop leurs marchandises. ",
303 "Car ils se fontvite pédagogues et t'enseignent comme but ce qui ",
304 "n'est par essence qu'un moyen, et te trompant ainsi sur la route ",
305 "à suivre les voilà bientôt qui te dégradent, car si leur musique ",
306 "est vulgaire ils te fabriquent pour te la vendre une âme vulgaire.\n",
307 "— Antoine de Saint-Exupéry, Citadelle (1948)"
308 ),
309 ),
310 (
311 "=E2=80=94=E2=80=89Antoine de Saint-Exup=C3=A9ry",
312 "— Antoine de Saint-Exupéry",
313 ),
314 (
315 concat!(
316 "Die Hasen klagten einst uber ihre Lage; \"wir ",
317 "leben\", sprach ein=\r\n Redner, \"in steter Furcht vor Menschen",
318 " und Tieren, eine Beute der Hunde,=\r\n der\n"
319 ),
320 concat!(
321 "Die Hasen klagten einst uber ihre Lage; \"wir leben\", ",
322 "sprach ein Redner, \"in steter Furcht vor Menschen und ",
323 "Tieren, eine Beute der Hunde, der\r\n"
324 ),
325 ),
326 (
327 concat!(
328 "hello \r\nbar=\r\n\r\nfoo\t=\r\nbar\r\nfoo\t \t= \r\n=62\r\nfoo = ",
329 "\t\r\nbar\r\nfoo =\r\n=62\r\nfoo \r\nbar=\r\n\r\nfoo_bar\r\n"
330 ),
331 "hello\r\nbar\r\nfoo\tbar\r\nfoo\t \tb\r\nfoo bar\r\nfoo b\r\nfoo\r\nbar\r\nfoo_bar\r\n",
332 ),
333 ("\n\n", "\n\n"),
334 ] {
335 assert_eq!(
336 String::from_utf8(super::quoted_printable_decode(encoded_str.as_bytes()).unwrap_or_default()).unwrap(),
337 expected_result,
338 "Failed for {encoded_str:?}",
339 );
340 }
341 }
342
343 #[test]
344 fn decode_quoted_printable_mime() {
345 for (encoded_str, expected_result) in [
346 (
347 "=E2=80=94=E2=80=89Antoine de Saint-Exup=C3=A9ry\n--boundary",
348 "— Antoine de Saint-Exupéry",
349 ),
350 (
351 "=E2=80=94=E2=80=89Antoine de Saint-Exup=C3=A9ry\n--\n--boundary",
352 "— Antoine de Saint-Exupéry\n--",
353 ),
354 (
355 "=E2=80=94=E2=80=89Antoine de Saint-Exup=C3=A9ry=\n--\n--boundary",
356 "— Antoine de Saint-Exupéry--",
357 ),
358 (
359 concat!(
360 "J'interdis aux marchands de vanter trop leurs marchandises. ",
361 "Car ils se font=\nvite p=C3=A9dagogues et t'enseignent comme but ce ",
362 "qui n'est par essence qu=\n'un moyen, et te trompant ainsi sur la route ",
363 "=C3=A0 suivre les voil=C3=\n=A0 bient=C3=B4t qui te d=C3=A9gradent, car ",
364 "si leur musique est vulgaire il=\ns te fabriquent pour te la vendre une ",
365 "=C3=A2me vulgaire.\n=E2=80=94=E2=80=89Antoine de Saint-Exup=C3=A9ry, ",
366 "Citadelle (1948)\r\n--boundary--"
367 ),
368 concat!(
369 "J'interdis aux marchands de vanter trop leurs marchandises. ",
370 "Car ils se fontvite pédagogues et t'enseignent comme but ce qui ",
371 "n'est par essence qu'un moyen, et te trompant ainsi sur la route ",
372 "à suivre les voilà bientôt qui te dégradent, car si leur musique ",
373 "est vulgaire ils te fabriquent pour te la vendre une âme vulgaire.\n",
374 "— Antoine de Saint-Exupéry, Citadelle (1948)"
375 ),
376 ),
377 (
378 "=E2=80=94=E2=80=89Antoine de Saint-Exup=C3=A9ry\n--\n--boundary",
379 "— Antoine de Saint-Exupéry\n--",
380 ),
381 (
382 concat!(
383 "Die Hasen klagten einst uber ihre Lage; \"wir ",
384 "leben\", sprach ein=\r\n Redner, \"in steter Furcht vor Menschen",
385 " und Tieren, eine Beute der Hunde,=\r\n der\r\n\r\n--boundary \n"
386 ),
387 concat!(
388 "Die Hasen klagten einst uber ihre Lage; \"wir leben\", ",
389 "sprach ein Redner, \"in steter Furcht vor Menschen und ",
390 "Tieren, eine Beute der Hunde, der\r\n"
391 ),
392 ),
393 (
394 concat!(
395 "hello \r\nbar=\r\n\r\nfoo\t=\r\nbar\r\nfoo\t \t= \r\n=62\r\nfoo = ",
396 "\t\r\nbar\r\nfoo =\r\n=62\r\nfoo \r\nbar=\r\n\r\nfoo_bar\r\n\r\n--boundary"
397 ),
398 "hello\r\nbar\r\nfoo\tbar\r\nfoo\t \tb\r\nfoo bar\r\nfoo b\r\nfoo\r\nbar\r\nfoo_bar\r\n",
399 ),
400 ] {
401 let mut s = MessageStream::new(encoded_str.as_bytes());
402 let (bytes_read, result) = s.decode_quoted_printable_mime(b"boundary");
403 assert_ne!(bytes_read, usize::MAX);
404 assert_eq!(
405 std::str::from_utf8(result.as_ref()).unwrap(),
406 expected_result,
407 "Failed for {encoded_str:?}",
408 );
409 }
410 }
411
412 #[test]
413 fn decode_quoted_printable_word() {
414 for (encoded_str, expected_result) in [
415 ("this=20is=20some=20text?=", "this is some text"),
416 ("this=20is=20\n some=20text?=", "this is some text"),
417 ("this is some text?=", "this is some text"),
418 ("Keith_Moore?=", "Keith Moore"),
419 ("=2=123?=", ""),
420 ("= 20?=", ""),
421 ("=====?=", ""),
422 ("=20=20=XX?=", ""),
423 ("=AX?=", ""),
424 ("=\n=\n==?=", ""),
425 ("=\r=1z?=", ""),
426 ("=|?=", ""),
427 ("????????=", "???????"),
428 ("\n\n", ""),
429 ] {
430 let mut s = MessageStream::new(encoded_str.as_bytes());
431
432 assert_eq!(
433 s.decode_quoted_printable_word().unwrap_or_default(),
434 expected_result.as_bytes(),
435 "Failed for {encoded_str:?}",
436 );
437 }
438 }
439}