Skip to main content

mail_parser/decoders/
quoted_printable.rs

1/*
2 * SPDX-FileCopyrightText: 2020 Stalwart Labs LLC <hello@stalw.art>
3 *
4 * SPDX-License-Identifier: Apache-2.0 OR MIT
5 */
6
7use std::borrow::Cow;
8
9use crate::parsers::MessageStream;
10
11#[derive(PartialEq, Debug)]
12enum QuotedPrintableState {
13    None,
14    Eq,
15    Hex1,
16}
17
18pub fn quoted_printable_decode(bytes: &[u8]) -> Option<Vec<u8>> {
19    let mut buf = Vec::with_capacity(bytes.len());
20
21    let mut state = QuotedPrintableState::None;
22    let mut hex1 = 0;
23    let mut ws_count = 0;
24    let mut crlf = b"\n".as_ref();
25
26    for &ch in bytes {
27        match ch {
28            b'=' => {
29                if let QuotedPrintableState::None = state {
30                    state = QuotedPrintableState::Eq
31                } else {
32                    return None;
33                }
34            }
35            b'\n' => {
36                if QuotedPrintableState::Eq == state {
37                    state = QuotedPrintableState::None;
38                } else {
39                    if ws_count > 0 {
40                        buf.truncate(buf.len() - ws_count);
41                    }
42                    buf.extend_from_slice(crlf);
43                }
44                ws_count = 0;
45            }
46            b'\r' => {
47                crlf = b"\r\n".as_ref();
48            }
49            _ => match state {
50                QuotedPrintableState::None => {
51                    if ch.is_ascii_whitespace() {
52                        ws_count += 1;
53                    } else {
54                        ws_count = 0;
55                    }
56                    buf.push(ch);
57                }
58                QuotedPrintableState::Eq => {
59                    hex1 = HEX_MAP[ch as usize];
60
61                    if hex1 != -1 {
62                        state = QuotedPrintableState::Hex1;
63                    } else if !ch.is_ascii_whitespace() {
64                        return None;
65                    }
66                }
67                QuotedPrintableState::Hex1 => {
68                    let hex2 = HEX_MAP[ch as usize];
69
70                    state = QuotedPrintableState::None;
71                    if hex2 != -1 {
72                        buf.push(((hex1 as u8) << 4) | hex2 as u8);
73                        ws_count = 0;
74                    } else {
75                        return None;
76                    }
77                }
78            },
79        }
80    }
81
82    buf.into()
83}
84
85#[inline(always)]
86pub fn quoted_printable_decode_char(hex1: u8, hex2: u8) -> Option<u8> {
87    let hex1 = HEX_MAP[hex1 as usize];
88    let hex2 = HEX_MAP[hex2 as usize];
89
90    (hex1 != -1 && hex2 != -1).then_some(((hex1 as u8) << 4) | hex2 as u8)
91}
92
93impl<'x> MessageStream<'x> {
94    pub fn decode_quoted_printable_mime(&mut self, boundary: &[u8]) -> (usize, Cow<'x, [u8]>) {
95        let mut buf = Vec::with_capacity(128);
96
97        let mut state = QuotedPrintableState::None;
98        let mut hex1 = 0;
99        let mut last_ch = 0;
100        let mut before_last_ch = 0;
101        let mut ws_count = 0;
102        let mut end_pos = self.offset();
103        let mut crlf = b"\n".as_ref();
104
105        self.checkpoint();
106
107        while let Some(&ch) = self.next() {
108            match ch {
109                b'=' => {
110                    if let QuotedPrintableState::None = state {
111                        state = QuotedPrintableState::Eq
112                    } else {
113                        self.restore();
114                        return (usize::MAX, b""[..].into());
115                    }
116                }
117                b'\n' => {
118                    end_pos = if last_ch == b'\r' {
119                        self.offset() - 2
120                    } else {
121                        self.offset() - 1
122                    };
123                    if QuotedPrintableState::Eq == state {
124                        state = QuotedPrintableState::None;
125                    } else {
126                        if ws_count > 0 {
127                            buf.truncate(buf.len() - ws_count);
128                        }
129                        buf.extend_from_slice(crlf);
130                    }
131                    ws_count = 0;
132                }
133                b'\r' => {
134                    crlf = b"\r\n".as_ref();
135                }
136                b'-' if !boundary.is_empty() && last_ch == b'-' && self.try_skip(boundary) => {
137                    if before_last_ch == b'\n' {
138                        buf.truncate(buf.len() - (crlf.len() + 1));
139                    } else {
140                        buf.truncate(buf.len() - 1);
141                        end_pos = self.offset() - boundary.len() - 2;
142                    }
143
144                    return (end_pos, buf.into());
145                }
146                _ => match state {
147                    QuotedPrintableState::None => {
148                        if ch.is_ascii_whitespace() {
149                            ws_count += 1;
150                        } else {
151                            ws_count = 0;
152                        }
153                        buf.push(ch);
154                    }
155                    QuotedPrintableState::Eq => {
156                        hex1 = HEX_MAP[ch as usize];
157                        if hex1 != -1 {
158                            state = QuotedPrintableState::Hex1;
159                        } else if !ch.is_ascii_whitespace() {
160                            state = QuotedPrintableState::None;
161                            buf.push(b'=');
162                            buf.push(ch);
163                            ws_count = 0;
164                        }
165                    }
166                    QuotedPrintableState::Hex1 => {
167                        let hex2 = HEX_MAP[ch as usize];
168
169                        state = QuotedPrintableState::None;
170                        if hex2 != -1 {
171                            buf.push(((hex1 as u8) << 4) | hex2 as u8);
172                            ws_count = 0;
173                        } else {
174                            buf.push(b'=');
175                            buf.push(last_ch);
176                            buf.push(ch);
177                            ws_count = 0;
178                        }
179                    }
180                },
181            }
182
183            before_last_ch = last_ch;
184            last_ch = ch;
185        }
186
187        (
188            if boundary.is_empty() {
189                self.offset()
190            } else {
191                self.restore();
192                usize::MAX
193            },
194            buf.into(),
195        )
196    }
197
198    pub fn decode_quoted_printable_word(&mut self) -> Option<Vec<u8>> {
199        let mut buf = Vec::with_capacity(64);
200
201        let mut state = QuotedPrintableState::None;
202        let mut hex1 = 0;
203
204        while let Some(&ch) = self.next() {
205            match ch {
206                b'=' => {
207                    if let QuotedPrintableState::None = state {
208                        state = QuotedPrintableState::Eq
209                    } else {
210                        break;
211                    }
212                }
213                b'?' => {
214                    if let Some(b'=') = self.peek() {
215                        self.next();
216                        return buf.into();
217                    } else {
218                        buf.push(b'?');
219                    }
220                }
221                b'\n' => {
222                    if let Some(b' ' | b'\t') = self.peek() {
223                        loop {
224                            self.next();
225                            if !self.peek_next_is_space() {
226                                break;
227                            }
228                        }
229                    } else {
230                        break;
231                    }
232                }
233                b'_' => {
234                    buf.push(b' ');
235                }
236                b'\r' => (),
237                _ => match state {
238                    QuotedPrintableState::None => {
239                        buf.push(ch);
240                    }
241                    QuotedPrintableState::Eq => {
242                        hex1 = HEX_MAP[ch as usize];
243                        if hex1 != -1 {
244                            state = QuotedPrintableState::Hex1;
245                        } else {
246                            // Failed
247                            break;
248                        }
249                    }
250                    QuotedPrintableState::Hex1 => {
251                        let hex2 = HEX_MAP[ch as usize];
252                        state = QuotedPrintableState::None;
253                        if hex2 != -1 {
254                            buf.push(((hex1 as u8) << 4) | hex2 as u8);
255                        } else {
256                            // Failed
257                            break;
258                        }
259                    }
260                },
261            }
262        }
263
264        None
265    }
266}
267
268/*
269 * Adapted from Daniel Lemire's source:
270 * https://github.com/lemire/Code-used-on-Daniel-Lemire-s-blog/blob/master/2019/04/17/hexparse.cpp
271 *
272 */
273
274pub static HEX_MAP: &[i8] = &[
275    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
276    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
277    0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1,
278    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 10,
279    11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
280    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
281    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
282    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
283    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
284    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
285    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
286];
287
288#[cfg(test)]
289mod tests {
290    use crate::parsers::MessageStream;
291
292    #[test]
293    fn decode_quoted_printable() {
294        for (encoded_str, expected_result) in [
295            (
296                concat!(
297                    "J'interdis aux marchands de vanter trop leurs marchandises. ",
298                    "Car ils se font=\nvite p=C3=A9dagogues et t'enseignent comme but ce ",
299                    "qui n'est par essence qu=\n'un moyen, et te trompant ainsi sur la route ",
300                    "=C3=A0 suivre les voil=C3=\n=A0 bient=C3=B4t qui te d=C3=A9gradent, car ",
301                    "si leur musique est vulgaire il=\ns te fabriquent pour te la vendre une ",
302                    "=C3=A2me vulgaire.\n=E2=80=94=E2=80=89Antoine de Saint-Exup=C3=A9ry, ",
303                    "Citadelle (1948)"
304                ),
305                concat!(
306                    "J'interdis aux marchands de vanter trop leurs marchandises. ",
307                    "Car ils se fontvite pédagogues et t'enseignent comme but ce qui ",
308                    "n'est par essence qu'un moyen, et te trompant ainsi sur la route ",
309                    "à suivre les voilà bientôt qui te dégradent, car si leur musique ",
310                    "est vulgaire ils te fabriquent pour te la vendre une âme vulgaire.\n",
311                    "— Antoine de Saint-Exupéry, Citadelle (1948)"
312                ),
313            ),
314            (
315                "=E2=80=94=E2=80=89Antoine de Saint-Exup=C3=A9ry",
316                "— Antoine de Saint-Exupéry",
317            ),
318            (
319                concat!(
320                    "Die Hasen klagten einst uber ihre Lage; \"wir ",
321                    "leben\", sprach ein=\r\n Redner, \"in steter Furcht vor Menschen",
322                    " und Tieren, eine Beute der Hunde,=\r\n der\n"
323                ),
324                concat!(
325                    "Die Hasen klagten einst uber ihre Lage; \"wir leben\", ",
326                    "sprach ein Redner, \"in steter Furcht vor Menschen und ",
327                    "Tieren, eine Beute der Hunde, der\r\n"
328                ),
329            ),
330            (
331                concat!(
332                    "hello  \r\nbar=\r\n\r\nfoo\t=\r\nbar\r\nfoo\t \t= \r\n=62\r\nfoo = ",
333                    "\t\r\nbar\r\nfoo =\r\n=62\r\nfoo  \r\nbar=\r\n\r\nfoo_bar\r\n"
334                ),
335                "hello\r\nbar\r\nfoo\tbar\r\nfoo\t \tb\r\nfoo bar\r\nfoo b\r\nfoo\r\nbar\r\nfoo_bar\r\n",
336            ),
337            ("\n\n", "\n\n"),
338        ] {
339            assert_eq!(
340                String::from_utf8(
341                    super::quoted_printable_decode(encoded_str.as_bytes()).unwrap_or_default()
342                )
343                .unwrap(),
344                expected_result,
345                "Failed for {encoded_str:?}",
346            );
347        }
348    }
349
350    #[test]
351    fn decode_quoted_printable_mime() {
352        for (encoded_str, expected_result) in [
353            (
354                "<meta content=\"text/html; charset=utf-8\"> h=C3=B6\n--boundary",
355                "<meta content=\"text/html; charset=utf-8\"> hö",
356            ),
357            ("first=AZ second\n--boundary", "first=AZ second"),
358            (
359                "=E2=80=94=E2=80=89Antoine de Saint-Exup=C3=A9ry\n--boundary",
360                "— Antoine de Saint-Exupéry",
361            ),
362            (
363                "=E2=80=94=E2=80=89Antoine de Saint-Exup=C3=A9ry\n--\n--boundary",
364                "— Antoine de Saint-Exupéry\n--",
365            ),
366            (
367                "=E2=80=94=E2=80=89Antoine de Saint-Exup=C3=A9ry=\n--\n--boundary",
368                "— Antoine de Saint-Exupéry--",
369            ),
370            (
371                concat!(
372                    "J'interdis aux marchands de vanter trop leurs marchandises. ",
373                    "Car ils se font=\nvite p=C3=A9dagogues et t'enseignent comme but ce ",
374                    "qui n'est par essence qu=\n'un moyen, et te trompant ainsi sur la route ",
375                    "=C3=A0 suivre les voil=C3=\n=A0 bient=C3=B4t qui te d=C3=A9gradent, car ",
376                    "si leur musique est vulgaire il=\ns te fabriquent pour te la vendre une ",
377                    "=C3=A2me vulgaire.\n=E2=80=94=E2=80=89Antoine de Saint-Exup=C3=A9ry, ",
378                    "Citadelle (1948)\r\n--boundary--"
379                ),
380                concat!(
381                    "J'interdis aux marchands de vanter trop leurs marchandises. ",
382                    "Car ils se fontvite pédagogues et t'enseignent comme but ce qui ",
383                    "n'est par essence qu'un moyen, et te trompant ainsi sur la route ",
384                    "à suivre les voilà bientôt qui te dégradent, car si leur musique ",
385                    "est vulgaire ils te fabriquent pour te la vendre une âme vulgaire.\n",
386                    "— Antoine de Saint-Exupéry, Citadelle (1948)"
387                ),
388            ),
389            (
390                "=E2=80=94=E2=80=89Antoine de Saint-Exup=C3=A9ry\n--\n--boundary",
391                "— Antoine de Saint-Exupéry\n--",
392            ),
393            (
394                concat!(
395                    "Die Hasen klagten einst uber ihre Lage; \"wir ",
396                    "leben\", sprach ein=\r\n Redner, \"in steter Furcht vor Menschen",
397                    " und Tieren, eine Beute der Hunde,=\r\n der\r\n\r\n--boundary \n"
398                ),
399                concat!(
400                    "Die Hasen klagten einst uber ihre Lage; \"wir leben\", ",
401                    "sprach ein Redner, \"in steter Furcht vor Menschen und ",
402                    "Tieren, eine Beute der Hunde, der\r\n"
403                ),
404            ),
405            (
406                concat!(
407                    "hello  \r\nbar=\r\n\r\nfoo\t=\r\nbar\r\nfoo\t \t= \r\n=62\r\nfoo = ",
408                    "\t\r\nbar\r\nfoo =\r\n=62\r\nfoo  \r\nbar=\r\n\r\nfoo_bar\r\n\r\n--boundary"
409                ),
410                "hello\r\nbar\r\nfoo\tbar\r\nfoo\t \tb\r\nfoo bar\r\nfoo b\r\nfoo\r\nbar\r\nfoo_bar\r\n",
411            ),
412        ] {
413            let mut s = MessageStream::new(encoded_str.as_bytes());
414            let (bytes_read, result) = s.decode_quoted_printable_mime(b"boundary");
415            assert_ne!(bytes_read, usize::MAX);
416            assert_eq!(
417                std::str::from_utf8(result.as_ref()).unwrap(),
418                expected_result,
419                "Failed for {encoded_str:?}",
420            );
421        }
422    }
423
424    #[test]
425    fn decode_quoted_printable_word() {
426        for (encoded_str, expected_result) in [
427            ("this=20is=20some=20text?=", "this is some text"),
428            ("this=20is=20\n  some=20text?=", "this is some text"),
429            ("this is some text?=", "this is some text"),
430            ("Keith_Moore?=", "Keith Moore"),
431            ("=2=123?=", ""),
432            ("= 20?=", ""),
433            ("=====?=", ""),
434            ("=20=20=XX?=", ""),
435            ("=AX?=", ""),
436            ("=\n=\n==?=", ""),
437            ("=\r=1z?=", ""),
438            ("=|?=", ""),
439            ("????????=", "???????"),
440            ("\n\n", ""),
441        ] {
442            let mut s = MessageStream::new(encoded_str.as_bytes());
443
444            assert_eq!(
445                s.decode_quoted_printable_word().unwrap_or_default(),
446                expected_result.as_bytes(),
447                "Failed for {encoded_str:?}",
448            );
449        }
450    }
451}