1use std::borrow::Cow;
8
9use crate::parsers::MessageStream;
10
11#[derive(PartialEq, Debug)]
12enum QuotedPrintableState {
13 None,
14 Eq,
15 Hex1,
16}
17
18pub fn quoted_printable_decode(bytes: &[u8]) -> Option<Vec<u8>> {
19 let mut buf = Vec::with_capacity(bytes.len());
20
21 let mut state = QuotedPrintableState::None;
22 let mut hex1 = 0;
23 let mut ws_count = 0;
24 let mut crlf = b"\n".as_ref();
25
26 for &ch in bytes {
27 match ch {
28 b'=' => {
29 if let QuotedPrintableState::None = state {
30 state = QuotedPrintableState::Eq
31 } else {
32 return None;
33 }
34 }
35 b'\n' => {
36 if QuotedPrintableState::Eq == state {
37 state = QuotedPrintableState::None;
38 } else {
39 if ws_count > 0 {
40 buf.truncate(buf.len() - ws_count);
41 }
42 buf.extend_from_slice(crlf);
43 }
44 ws_count = 0;
45 }
46 b'\r' => {
47 crlf = b"\r\n".as_ref();
48 }
49 _ => match state {
50 QuotedPrintableState::None => {
51 if ch.is_ascii_whitespace() {
52 ws_count += 1;
53 } else {
54 ws_count = 0;
55 }
56 buf.push(ch);
57 }
58 QuotedPrintableState::Eq => {
59 hex1 = HEX_MAP[ch as usize];
60
61 if hex1 != -1 {
62 state = QuotedPrintableState::Hex1;
63 } else if !ch.is_ascii_whitespace() {
64 return None;
65 }
66 }
67 QuotedPrintableState::Hex1 => {
68 let hex2 = HEX_MAP[ch as usize];
69
70 state = QuotedPrintableState::None;
71 if hex2 != -1 {
72 buf.push(((hex1 as u8) << 4) | hex2 as u8);
73 ws_count = 0;
74 } else {
75 return None;
76 }
77 }
78 },
79 }
80 }
81
82 buf.into()
83}
84
85#[inline(always)]
86pub fn quoted_printable_decode_char(hex1: u8, hex2: u8) -> Option<u8> {
87 let hex1 = HEX_MAP[hex1 as usize];
88 let hex2 = HEX_MAP[hex2 as usize];
89
90 (hex1 != -1 && hex2 != -1).then_some(((hex1 as u8) << 4) | hex2 as u8)
91}
92
93impl<'x> MessageStream<'x> {
94 pub fn decode_quoted_printable_mime(&mut self, boundary: &[u8]) -> (usize, Cow<'x, [u8]>) {
95 let mut buf = Vec::with_capacity(128);
96
97 let mut state = QuotedPrintableState::None;
98 let mut hex1 = 0;
99 let mut last_ch = 0;
100 let mut before_last_ch = 0;
101 let mut ws_count = 0;
102 let mut end_pos = self.offset();
103 let mut crlf = b"\n".as_ref();
104
105 self.checkpoint();
106
107 while let Some(&ch) = self.next() {
108 match ch {
109 b'=' => {
110 if let QuotedPrintableState::None = state {
111 state = QuotedPrintableState::Eq
112 } else {
113 self.restore();
114 return (usize::MAX, b""[..].into());
115 }
116 }
117 b'\n' => {
118 end_pos = if last_ch == b'\r' {
119 self.offset() - 2
120 } else {
121 self.offset() - 1
122 };
123 if QuotedPrintableState::Eq == state {
124 state = QuotedPrintableState::None;
125 } else {
126 if ws_count > 0 {
127 buf.truncate(buf.len() - ws_count);
128 }
129 buf.extend_from_slice(crlf);
130 }
131 ws_count = 0;
132 }
133 b'\r' => {
134 crlf = b"\r\n".as_ref();
135 }
136 b'-' if !boundary.is_empty() && last_ch == b'-' && self.try_skip(boundary) => {
137 if before_last_ch == b'\n' {
138 buf.truncate(buf.len() - (crlf.len() + 1));
139 } else {
140 buf.truncate(buf.len() - 1);
141 end_pos = self.offset() - boundary.len() - 2;
142 }
143
144 return (end_pos, buf.into());
145 }
146 _ => match state {
147 QuotedPrintableState::None => {
148 if ch.is_ascii_whitespace() {
149 ws_count += 1;
150 } else {
151 ws_count = 0;
152 }
153 buf.push(ch);
154 }
155 QuotedPrintableState::Eq => {
156 hex1 = HEX_MAP[ch as usize];
157 if hex1 != -1 {
158 state = QuotedPrintableState::Hex1;
159 } else if !ch.is_ascii_whitespace() {
160 state = QuotedPrintableState::None;
161 buf.push(b'=');
162 buf.push(ch);
163 ws_count = 0;
164 }
165 }
166 QuotedPrintableState::Hex1 => {
167 let hex2 = HEX_MAP[ch as usize];
168
169 state = QuotedPrintableState::None;
170 if hex2 != -1 {
171 buf.push(((hex1 as u8) << 4) | hex2 as u8);
172 ws_count = 0;
173 } else {
174 buf.push(b'=');
175 buf.push(last_ch);
176 buf.push(ch);
177 ws_count = 0;
178 }
179 }
180 },
181 }
182
183 before_last_ch = last_ch;
184 last_ch = ch;
185 }
186
187 (
188 if boundary.is_empty() {
189 self.offset()
190 } else {
191 self.restore();
192 usize::MAX
193 },
194 buf.into(),
195 )
196 }
197
198 pub fn decode_quoted_printable_word(&mut self) -> Option<Vec<u8>> {
199 let mut buf = Vec::with_capacity(64);
200
201 let mut state = QuotedPrintableState::None;
202 let mut hex1 = 0;
203
204 while let Some(&ch) = self.next() {
205 match ch {
206 b'=' => {
207 if let QuotedPrintableState::None = state {
208 state = QuotedPrintableState::Eq
209 } else {
210 break;
211 }
212 }
213 b'?' => {
214 if let Some(b'=') = self.peek() {
215 self.next();
216 return buf.into();
217 } else {
218 buf.push(b'?');
219 }
220 }
221 b'\n' => {
222 if let Some(b' ' | b'\t') = self.peek() {
223 loop {
224 self.next();
225 if !self.peek_next_is_space() {
226 break;
227 }
228 }
229 } else {
230 break;
231 }
232 }
233 b'_' => {
234 buf.push(b' ');
235 }
236 b'\r' => (),
237 _ => match state {
238 QuotedPrintableState::None => {
239 buf.push(ch);
240 }
241 QuotedPrintableState::Eq => {
242 hex1 = HEX_MAP[ch as usize];
243 if hex1 != -1 {
244 state = QuotedPrintableState::Hex1;
245 } else {
246 break;
248 }
249 }
250 QuotedPrintableState::Hex1 => {
251 let hex2 = HEX_MAP[ch as usize];
252 state = QuotedPrintableState::None;
253 if hex2 != -1 {
254 buf.push(((hex1 as u8) << 4) | hex2 as u8);
255 } else {
256 break;
258 }
259 }
260 },
261 }
262 }
263
264 None
265 }
266}
267
268pub static HEX_MAP: &[i8] = &[
275 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
276 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
277 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1,
278 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 10,
279 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
280 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
281 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
282 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
283 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
284 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
285 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
286];
287
288#[cfg(test)]
289mod tests {
290 use crate::parsers::MessageStream;
291
292 #[test]
293 fn decode_quoted_printable() {
294 for (encoded_str, expected_result) in [
295 (
296 concat!(
297 "J'interdis aux marchands de vanter trop leurs marchandises. ",
298 "Car ils se font=\nvite p=C3=A9dagogues et t'enseignent comme but ce ",
299 "qui n'est par essence qu=\n'un moyen, et te trompant ainsi sur la route ",
300 "=C3=A0 suivre les voil=C3=\n=A0 bient=C3=B4t qui te d=C3=A9gradent, car ",
301 "si leur musique est vulgaire il=\ns te fabriquent pour te la vendre une ",
302 "=C3=A2me vulgaire.\n=E2=80=94=E2=80=89Antoine de Saint-Exup=C3=A9ry, ",
303 "Citadelle (1948)"
304 ),
305 concat!(
306 "J'interdis aux marchands de vanter trop leurs marchandises. ",
307 "Car ils se fontvite pédagogues et t'enseignent comme but ce qui ",
308 "n'est par essence qu'un moyen, et te trompant ainsi sur la route ",
309 "à suivre les voilà bientôt qui te dégradent, car si leur musique ",
310 "est vulgaire ils te fabriquent pour te la vendre une âme vulgaire.\n",
311 "— Antoine de Saint-Exupéry, Citadelle (1948)"
312 ),
313 ),
314 (
315 "=E2=80=94=E2=80=89Antoine de Saint-Exup=C3=A9ry",
316 "— Antoine de Saint-Exupéry",
317 ),
318 (
319 concat!(
320 "Die Hasen klagten einst uber ihre Lage; \"wir ",
321 "leben\", sprach ein=\r\n Redner, \"in steter Furcht vor Menschen",
322 " und Tieren, eine Beute der Hunde,=\r\n der\n"
323 ),
324 concat!(
325 "Die Hasen klagten einst uber ihre Lage; \"wir leben\", ",
326 "sprach ein Redner, \"in steter Furcht vor Menschen und ",
327 "Tieren, eine Beute der Hunde, der\r\n"
328 ),
329 ),
330 (
331 concat!(
332 "hello \r\nbar=\r\n\r\nfoo\t=\r\nbar\r\nfoo\t \t= \r\n=62\r\nfoo = ",
333 "\t\r\nbar\r\nfoo =\r\n=62\r\nfoo \r\nbar=\r\n\r\nfoo_bar\r\n"
334 ),
335 "hello\r\nbar\r\nfoo\tbar\r\nfoo\t \tb\r\nfoo bar\r\nfoo b\r\nfoo\r\nbar\r\nfoo_bar\r\n",
336 ),
337 ("\n\n", "\n\n"),
338 ] {
339 assert_eq!(
340 String::from_utf8(
341 super::quoted_printable_decode(encoded_str.as_bytes()).unwrap_or_default()
342 )
343 .unwrap(),
344 expected_result,
345 "Failed for {encoded_str:?}",
346 );
347 }
348 }
349
350 #[test]
351 fn decode_quoted_printable_mime() {
352 for (encoded_str, expected_result) in [
353 (
354 "<meta content=\"text/html; charset=utf-8\"> h=C3=B6\n--boundary",
355 "<meta content=\"text/html; charset=utf-8\"> hö",
356 ),
357 ("first=AZ second\n--boundary", "first=AZ second"),
358 (
359 "=E2=80=94=E2=80=89Antoine de Saint-Exup=C3=A9ry\n--boundary",
360 "— Antoine de Saint-Exupéry",
361 ),
362 (
363 "=E2=80=94=E2=80=89Antoine de Saint-Exup=C3=A9ry\n--\n--boundary",
364 "— Antoine de Saint-Exupéry\n--",
365 ),
366 (
367 "=E2=80=94=E2=80=89Antoine de Saint-Exup=C3=A9ry=\n--\n--boundary",
368 "— Antoine de Saint-Exupéry--",
369 ),
370 (
371 concat!(
372 "J'interdis aux marchands de vanter trop leurs marchandises. ",
373 "Car ils se font=\nvite p=C3=A9dagogues et t'enseignent comme but ce ",
374 "qui n'est par essence qu=\n'un moyen, et te trompant ainsi sur la route ",
375 "=C3=A0 suivre les voil=C3=\n=A0 bient=C3=B4t qui te d=C3=A9gradent, car ",
376 "si leur musique est vulgaire il=\ns te fabriquent pour te la vendre une ",
377 "=C3=A2me vulgaire.\n=E2=80=94=E2=80=89Antoine de Saint-Exup=C3=A9ry, ",
378 "Citadelle (1948)\r\n--boundary--"
379 ),
380 concat!(
381 "J'interdis aux marchands de vanter trop leurs marchandises. ",
382 "Car ils se fontvite pédagogues et t'enseignent comme but ce qui ",
383 "n'est par essence qu'un moyen, et te trompant ainsi sur la route ",
384 "à suivre les voilà bientôt qui te dégradent, car si leur musique ",
385 "est vulgaire ils te fabriquent pour te la vendre une âme vulgaire.\n",
386 "— Antoine de Saint-Exupéry, Citadelle (1948)"
387 ),
388 ),
389 (
390 "=E2=80=94=E2=80=89Antoine de Saint-Exup=C3=A9ry\n--\n--boundary",
391 "— Antoine de Saint-Exupéry\n--",
392 ),
393 (
394 concat!(
395 "Die Hasen klagten einst uber ihre Lage; \"wir ",
396 "leben\", sprach ein=\r\n Redner, \"in steter Furcht vor Menschen",
397 " und Tieren, eine Beute der Hunde,=\r\n der\r\n\r\n--boundary \n"
398 ),
399 concat!(
400 "Die Hasen klagten einst uber ihre Lage; \"wir leben\", ",
401 "sprach ein Redner, \"in steter Furcht vor Menschen und ",
402 "Tieren, eine Beute der Hunde, der\r\n"
403 ),
404 ),
405 (
406 concat!(
407 "hello \r\nbar=\r\n\r\nfoo\t=\r\nbar\r\nfoo\t \t= \r\n=62\r\nfoo = ",
408 "\t\r\nbar\r\nfoo =\r\n=62\r\nfoo \r\nbar=\r\n\r\nfoo_bar\r\n\r\n--boundary"
409 ),
410 "hello\r\nbar\r\nfoo\tbar\r\nfoo\t \tb\r\nfoo bar\r\nfoo b\r\nfoo\r\nbar\r\nfoo_bar\r\n",
411 ),
412 ] {
413 let mut s = MessageStream::new(encoded_str.as_bytes());
414 let (bytes_read, result) = s.decode_quoted_printable_mime(b"boundary");
415 assert_ne!(bytes_read, usize::MAX);
416 assert_eq!(
417 std::str::from_utf8(result.as_ref()).unwrap(),
418 expected_result,
419 "Failed for {encoded_str:?}",
420 );
421 }
422 }
423
424 #[test]
425 fn decode_quoted_printable_word() {
426 for (encoded_str, expected_result) in [
427 ("this=20is=20some=20text?=", "this is some text"),
428 ("this=20is=20\n some=20text?=", "this is some text"),
429 ("this is some text?=", "this is some text"),
430 ("Keith_Moore?=", "Keith Moore"),
431 ("=2=123?=", ""),
432 ("= 20?=", ""),
433 ("=====?=", ""),
434 ("=20=20=XX?=", ""),
435 ("=AX?=", ""),
436 ("=\n=\n==?=", ""),
437 ("=\r=1z?=", ""),
438 ("=|?=", ""),
439 ("????????=", "???????"),
440 ("\n\n", ""),
441 ] {
442 let mut s = MessageStream::new(encoded_str.as_bytes());
443
444 assert_eq!(
445 s.decode_quoted_printable_word().unwrap_or_default(),
446 expected_result.as_bytes(),
447 "Failed for {encoded_str:?}",
448 );
449 }
450 }
451}