mail_parser/decoders/
quoted_printable.rs

1/*
2 * SPDX-FileCopyrightText: 2020 Stalwart Labs LLC <hello@stalw.art>
3 *
4 * SPDX-License-Identifier: Apache-2.0 OR MIT
5 */
6
7use std::borrow::Cow;
8
9use crate::parsers::MessageStream;
10
11#[derive(PartialEq, Debug)]
12enum QuotedPrintableState {
13    None,
14    Eq,
15    Hex1,
16}
17
18pub fn quoted_printable_decode(bytes: &[u8]) -> Option<Vec<u8>> {
19    let mut buf = Vec::with_capacity(bytes.len());
20
21    let mut state = QuotedPrintableState::None;
22    let mut hex1 = 0;
23    let mut ws_count = 0;
24    let mut crlf = b"\n".as_ref();
25
26    for &ch in bytes {
27        match ch {
28            b'=' => {
29                if let QuotedPrintableState::None = state {
30                    state = QuotedPrintableState::Eq
31                } else {
32                    return None;
33                }
34            }
35            b'\n' => {
36                if QuotedPrintableState::Eq == state {
37                    state = QuotedPrintableState::None;
38                } else {
39                    if ws_count > 0 {
40                        buf.truncate(buf.len() - ws_count);
41                    }
42                    buf.extend_from_slice(crlf);
43                }
44                ws_count = 0;
45            }
46            b'\r' => {
47                crlf = b"\r\n".as_ref();
48            }
49            _ => match state {
50                QuotedPrintableState::None => {
51                    if ch.is_ascii_whitespace() {
52                        ws_count += 1;
53                    } else {
54                        ws_count = 0;
55                    }
56                    buf.push(ch);
57                }
58                QuotedPrintableState::Eq => {
59                    hex1 = HEX_MAP[ch as usize];
60
61                    if hex1 != -1 {
62                        state = QuotedPrintableState::Hex1;
63                    } else if !ch.is_ascii_whitespace() {
64                        return None;
65                    }
66                }
67                QuotedPrintableState::Hex1 => {
68                    let hex2 = HEX_MAP[ch as usize];
69
70                    state = QuotedPrintableState::None;
71                    if hex2 != -1 {
72                        buf.push(((hex1 as u8) << 4) | hex2 as u8);
73                        ws_count = 0;
74                    } else {
75                        return None;
76                    }
77                }
78            },
79        }
80    }
81
82    buf.into()
83}
84
85#[inline(always)]
86pub fn quoted_printable_decode_char(hex1: u8, hex2: u8) -> Option<u8> {
87    let hex1 = HEX_MAP[hex1 as usize];
88    let hex2 = HEX_MAP[hex2 as usize];
89
90    (hex1 != -1 && hex2 != -1).then_some(((hex1 as u8) << 4) | hex2 as u8)
91}
92
93impl<'x> MessageStream<'x> {
94    pub fn decode_quoted_printable_mime(&mut self, boundary: &[u8]) -> (usize, Cow<'x, [u8]>) {
95        let mut buf = Vec::with_capacity(128);
96
97        let mut state = QuotedPrintableState::None;
98        let mut hex1 = 0;
99        let mut last_ch = 0;
100        let mut before_last_ch = 0;
101        let mut ws_count = 0;
102        let mut end_pos = self.offset();
103        let mut crlf = b"\n".as_ref();
104
105        self.checkpoint();
106
107        while let Some(&ch) = self.next() {
108            match ch {
109                b'=' => {
110                    if let QuotedPrintableState::None = state {
111                        state = QuotedPrintableState::Eq
112                    } else {
113                        self.restore();
114                        return (usize::MAX, b""[..].into());
115                    }
116                }
117                b'\n' => {
118                    end_pos = if last_ch == b'\r' {
119                        self.offset() - 2
120                    } else {
121                        self.offset() - 1
122                    };
123                    if QuotedPrintableState::Eq == state {
124                        state = QuotedPrintableState::None;
125                    } else {
126                        if ws_count > 0 {
127                            buf.truncate(buf.len() - ws_count);
128                        }
129                        buf.extend_from_slice(crlf);
130                    }
131                    ws_count = 0;
132                }
133                b'\r' => {
134                    crlf = b"\r\n".as_ref();
135                }
136                b'-' if !boundary.is_empty() && last_ch == b'-' && self.try_skip(boundary) => {
137                    if before_last_ch == b'\n' {
138                        buf.truncate(buf.len() - (crlf.len() + 1));
139                    } else {
140                        buf.truncate(buf.len() - 1);
141                        end_pos = self.offset() - boundary.len() - 2;
142                    }
143
144                    return (end_pos, buf.into());
145                }
146                _ => match state {
147                    QuotedPrintableState::None => {
148                        if ch.is_ascii_whitespace() {
149                            ws_count += 1;
150                        } else {
151                            ws_count = 0;
152                        }
153                        buf.push(ch);
154                    }
155                    QuotedPrintableState::Eq => {
156                        hex1 = HEX_MAP[ch as usize];
157                        if hex1 != -1 {
158                            state = QuotedPrintableState::Hex1;
159                        } else if !ch.is_ascii_whitespace() {
160                            self.restore();
161                            return (usize::MAX, b""[..].into());
162                        }
163                    }
164                    QuotedPrintableState::Hex1 => {
165                        let hex2 = HEX_MAP[ch as usize];
166
167                        state = QuotedPrintableState::None;
168                        if hex2 != -1 {
169                            buf.push(((hex1 as u8) << 4) | hex2 as u8);
170                            ws_count = 0;
171                        } else {
172                            self.restore();
173                            return (usize::MAX, b""[..].into());
174                        }
175                    }
176                },
177            }
178
179            before_last_ch = last_ch;
180            last_ch = ch;
181        }
182
183        (
184            if boundary.is_empty() {
185                self.offset()
186            } else {
187                self.restore();
188                usize::MAX
189            },
190            buf.into(),
191        )
192    }
193
194    pub fn decode_quoted_printable_word(&mut self) -> Option<Vec<u8>> {
195        let mut buf = Vec::with_capacity(64);
196
197        let mut state = QuotedPrintableState::None;
198        let mut hex1 = 0;
199
200        while let Some(&ch) = self.next() {
201            match ch {
202                b'=' => {
203                    if let QuotedPrintableState::None = state {
204                        state = QuotedPrintableState::Eq
205                    } else {
206                        break;
207                    }
208                }
209                b'?' => {
210                    if let Some(b'=') = self.peek() {
211                        self.next();
212                        return buf.into();
213                    } else {
214                        buf.push(b'?');
215                    }
216                }
217                b'\n' => {
218                    if let Some(b' ' | b'\t') = self.peek() {
219                        loop {
220                            self.next();
221                            if !self.peek_next_is_space() {
222                                break;
223                            }
224                        }
225                    } else {
226                        break;
227                    }
228                }
229                b'_' => {
230                    buf.push(b' ');
231                }
232                b'\r' => (),
233                _ => match state {
234                    QuotedPrintableState::None => {
235                        buf.push(ch);
236                    }
237                    QuotedPrintableState::Eq => {
238                        hex1 = HEX_MAP[ch as usize];
239                        if hex1 != -1 {
240                            state = QuotedPrintableState::Hex1;
241                        } else {
242                            // Failed
243                            break;
244                        }
245                    }
246                    QuotedPrintableState::Hex1 => {
247                        let hex2 = HEX_MAP[ch as usize];
248                        state = QuotedPrintableState::None;
249                        if hex2 != -1 {
250                            buf.push(((hex1 as u8) << 4) | hex2 as u8);
251                        } else {
252                            // Failed
253                            break;
254                        }
255                    }
256                },
257            }
258        }
259
260        None
261    }
262}
263
264/*
265 * Adapted from Daniel Lemire's source:
266 * https://github.com/lemire/Code-used-on-Daniel-Lemire-s-blog/blob/master/2019/04/17/hexparse.cpp
267 *
268 */
269
270pub static HEX_MAP: &[i8] = &[
271    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
272    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
273    0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1,
274    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 10,
275    11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
276    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
277    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
278    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
279    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
280    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
281    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
282];
283
284#[cfg(test)]
285mod tests {
286    use crate::parsers::MessageStream;
287
288    #[test]
289    fn decode_quoted_printable() {
290        for (encoded_str, expected_result) in [
291            (
292                concat!(
293                    "J'interdis aux marchands de vanter trop leurs marchandises. ",
294                    "Car ils se font=\nvite p=C3=A9dagogues et t'enseignent comme but ce ",
295                    "qui n'est par essence qu=\n'un moyen, et te trompant ainsi sur la route ",
296                    "=C3=A0 suivre les voil=C3=\n=A0 bient=C3=B4t qui te d=C3=A9gradent, car ",
297                    "si leur musique est vulgaire il=\ns te fabriquent pour te la vendre une ",
298                    "=C3=A2me vulgaire.\n=E2=80=94=E2=80=89Antoine de Saint-Exup=C3=A9ry, ",
299                    "Citadelle (1948)"
300                ),
301                concat!(
302                    "J'interdis aux marchands de vanter trop leurs marchandises. ",
303                    "Car ils se fontvite pédagogues et t'enseignent comme but ce qui ",
304                    "n'est par essence qu'un moyen, et te trompant ainsi sur la route ",
305                    "à suivre les voilà bientôt qui te dégradent, car si leur musique ",
306                    "est vulgaire ils te fabriquent pour te la vendre une âme vulgaire.\n",
307                    "— Antoine de Saint-Exupéry, Citadelle (1948)"
308                ),
309            ),
310            (
311                "=E2=80=94=E2=80=89Antoine de Saint-Exup=C3=A9ry",
312                "— Antoine de Saint-Exupéry",
313            ),
314            (
315                concat!(
316                    "Die Hasen klagten einst uber ihre Lage; \"wir ",
317                    "leben\", sprach ein=\r\n Redner, \"in steter Furcht vor Menschen",
318                    " und Tieren, eine Beute der Hunde,=\r\n der\n"
319                ),
320                concat!(
321                    "Die Hasen klagten einst uber ihre Lage; \"wir leben\", ",
322                    "sprach ein Redner, \"in steter Furcht vor Menschen und ",
323                    "Tieren, eine Beute der Hunde, der\r\n"
324                ),
325            ),
326            (
327                concat!(
328                    "hello  \r\nbar=\r\n\r\nfoo\t=\r\nbar\r\nfoo\t \t= \r\n=62\r\nfoo = ",
329                    "\t\r\nbar\r\nfoo =\r\n=62\r\nfoo  \r\nbar=\r\n\r\nfoo_bar\r\n"
330                ),
331                "hello\r\nbar\r\nfoo\tbar\r\nfoo\t \tb\r\nfoo bar\r\nfoo b\r\nfoo\r\nbar\r\nfoo_bar\r\n",
332            ),
333            ("\n\n", "\n\n"),
334        ] {
335            assert_eq!(
336                String::from_utf8(super::quoted_printable_decode(encoded_str.as_bytes()).unwrap_or_default()).unwrap(),
337                expected_result,
338                "Failed for {encoded_str:?}",
339            );
340        }
341    }
342
343    #[test]
344    fn decode_quoted_printable_mime() {
345        for (encoded_str, expected_result) in [
346            (
347                "=E2=80=94=E2=80=89Antoine de Saint-Exup=C3=A9ry\n--boundary",
348                "— Antoine de Saint-Exupéry",
349            ),
350            (
351                "=E2=80=94=E2=80=89Antoine de Saint-Exup=C3=A9ry\n--\n--boundary",
352                "— Antoine de Saint-Exupéry\n--",
353            ),
354            (
355                "=E2=80=94=E2=80=89Antoine de Saint-Exup=C3=A9ry=\n--\n--boundary",
356                "— Antoine de Saint-Exupéry--",
357            ),
358            (
359                concat!(
360                    "J'interdis aux marchands de vanter trop leurs marchandises. ",
361                    "Car ils se font=\nvite p=C3=A9dagogues et t'enseignent comme but ce ",
362                    "qui n'est par essence qu=\n'un moyen, et te trompant ainsi sur la route ",
363                    "=C3=A0 suivre les voil=C3=\n=A0 bient=C3=B4t qui te d=C3=A9gradent, car ",
364                    "si leur musique est vulgaire il=\ns te fabriquent pour te la vendre une ",
365                    "=C3=A2me vulgaire.\n=E2=80=94=E2=80=89Antoine de Saint-Exup=C3=A9ry, ",
366                    "Citadelle (1948)\r\n--boundary--"
367                ),
368                concat!(
369                    "J'interdis aux marchands de vanter trop leurs marchandises. ",
370                    "Car ils se fontvite pédagogues et t'enseignent comme but ce qui ",
371                    "n'est par essence qu'un moyen, et te trompant ainsi sur la route ",
372                    "à suivre les voilà bientôt qui te dégradent, car si leur musique ",
373                    "est vulgaire ils te fabriquent pour te la vendre une âme vulgaire.\n",
374                    "— Antoine de Saint-Exupéry, Citadelle (1948)"
375                ),
376            ),
377            (
378                "=E2=80=94=E2=80=89Antoine de Saint-Exup=C3=A9ry\n--\n--boundary",
379                "— Antoine de Saint-Exupéry\n--",
380            ),
381            (
382                concat!(
383                    "Die Hasen klagten einst uber ihre Lage; \"wir ",
384                    "leben\", sprach ein=\r\n Redner, \"in steter Furcht vor Menschen",
385                    " und Tieren, eine Beute der Hunde,=\r\n der\r\n\r\n--boundary \n"
386                ),
387                concat!(
388                    "Die Hasen klagten einst uber ihre Lage; \"wir leben\", ",
389                    "sprach ein Redner, \"in steter Furcht vor Menschen und ",
390                    "Tieren, eine Beute der Hunde, der\r\n"
391                ),
392            ),
393            (
394                concat!(
395                    "hello  \r\nbar=\r\n\r\nfoo\t=\r\nbar\r\nfoo\t \t= \r\n=62\r\nfoo = ",
396                    "\t\r\nbar\r\nfoo =\r\n=62\r\nfoo  \r\nbar=\r\n\r\nfoo_bar\r\n\r\n--boundary"
397                ),
398                "hello\r\nbar\r\nfoo\tbar\r\nfoo\t \tb\r\nfoo bar\r\nfoo b\r\nfoo\r\nbar\r\nfoo_bar\r\n",
399            ),
400        ] {
401            let mut s = MessageStream::new(encoded_str.as_bytes());
402            let (bytes_read, result) = s.decode_quoted_printable_mime(b"boundary");
403            assert_ne!(bytes_read, usize::MAX);
404            assert_eq!(
405                std::str::from_utf8(result.as_ref()).unwrap(),
406                expected_result,
407                "Failed for {encoded_str:?}",
408            );
409        }
410    }
411
412    #[test]
413    fn decode_quoted_printable_word() {
414        for (encoded_str, expected_result) in [
415            ("this=20is=20some=20text?=", "this is some text"),
416            ("this=20is=20\n  some=20text?=", "this is some text"),
417            ("this is some text?=", "this is some text"),
418            ("Keith_Moore?=", "Keith Moore"),
419            ("=2=123?=", ""),
420            ("= 20?=", ""),
421            ("=====?=", ""),
422            ("=20=20=XX?=", ""),
423            ("=AX?=", ""),
424            ("=\n=\n==?=", ""),
425            ("=\r=1z?=", ""),
426            ("=|?=", ""),
427            ("????????=", "???????"),
428            ("\n\n", ""),
429        ] {
430            let mut s = MessageStream::new(encoded_str.as_bytes());
431
432            assert_eq!(
433                s.decode_quoted_printable_word().unwrap_or_default(),
434                expected_result.as_bytes(),
435                "Failed for {encoded_str:?}",
436            );
437        }
438    }
439}