mail_parser/decoders/
encoded_word.rs

1/*
2 * SPDX-FileCopyrightText: 2020 Stalwart Labs LLC <hello@stalw.art>
3 *
4 * SPDX-License-Identifier: Apache-2.0 OR MIT
5 */
6
7use crate::{decoders::charsets::map::charset_decoder, parsers::MessageStream};
8
9use super::DecodeWordFnc;
10
11enum Rfc2047State {
12    Init,
13    Charset,
14    Encoding,
15    Data,
16}
17
18impl MessageStream<'_> {
19    pub fn decode_rfc2047(&mut self) -> Option<String> {
20        let mut state = Rfc2047State::Init;
21
22        let mut charset_start = 0;
23        let mut charset_end = 0;
24        let mut decode_fnc: Option<DecodeWordFnc<'_>> = None;
25
26        while let Some(ch) = self.next() {
27            match state {
28                Rfc2047State::Init => {
29                    if ch != &b'?' {
30                        return None;
31                    }
32                    state = Rfc2047State::Charset;
33                    charset_start = self.offset();
34                    charset_end = self.offset();
35                }
36                Rfc2047State::Charset => match ch {
37                    b'?' => {
38                        if charset_end == charset_start {
39                            charset_end = self.offset() - 1;
40                        }
41                        if (charset_end - charset_start) < 2 {
42                            return None;
43                        }
44                        state = Rfc2047State::Encoding;
45                    }
46                    b'*' => {
47                        if charset_end == charset_start {
48                            charset_end = self.offset() - 1;
49                        }
50                    }
51                    b'\n' => {
52                        return None;
53                    }
54                    _ => (),
55                },
56                Rfc2047State::Encoding => {
57                    match ch {
58                        b'q' | b'Q' => {
59                            decode_fnc = Some(MessageStream::decode_quoted_printable_word)
60                        }
61                        b'b' | b'B' => decode_fnc = Some(MessageStream::decode_base64_word),
62                        _ => {
63                            return None;
64                        }
65                    }
66                    state = Rfc2047State::Data;
67                }
68                Rfc2047State::Data => {
69                    if ch != &b'?' {
70                        return None;
71                    } else {
72                        break;
73                    }
74                }
75            }
76        }
77
78        if let Some(bytes) = decode_fnc.and_then(|fnc| fnc(self)) {
79            if let Some(decoder) = charset_decoder(self.bytes(charset_start..charset_end)) {
80                decoder(&bytes).into()
81            } else {
82                String::from_utf8(bytes)
83                    .unwrap_or_else(|e| String::from_utf8_lossy(e.as_bytes()).into_owned())
84                    .into()
85            }
86        } else {
87            None
88        }
89    }
90}
91#[cfg(test)]
92mod tests {
93    use crate::parsers::MessageStream;
94
95    #[test]
96    fn decode_rfc2047() {
97        for (input, expected_result, _) in [
98            (
99                "?iso-8859-1?q?this=20is=20some=20text?=",
100                "this is some text",
101                true,
102            ),
103            (
104                "?iso-8859-1?q?this is some text?=",
105                "this is some text",
106                true,
107            ),
108            ("?US-ASCII?Q?Keith_Moore?=", "Keith Moore", false),
109            (
110                "?iso_8859-1:1987?Q?Keld_J=F8rn_Simonsen?=",
111                "Keld Jørn Simonsen",
112                true,
113            ),
114            (
115                "?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=",
116                "If you can read this yo",
117                true,
118            ),
119            (
120                "?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=",
121                "u understand the example.",
122                true,
123            ),
124            ("?ISO-8859-1?Q?Olle_J=E4rnefors?=", "Olle Järnefors", true),
125            (
126                "?ISO-8859-1?Q?Patrik_F=E4ltstr=F6m?=",
127                "Patrik Fältström",
128                true,
129            ),
130            ("?ISO-8859-1*?Q?a?=", "a", true),
131            ("?ISO-8859-1**?Q?a_b?=", "a b", true),
132            (
133                "?utf-8?b?VGjDrXMgw61zIHbDoWzDrWQgw5pURjg=?=",
134                "Thís ís válíd ÚTF8",
135                false,
136            ),
137            (
138                "?utf-8*unknown?q?Th=C3=ADs_=C3=ADs_v=C3=A1l=C3=ADd_=C3=9ATF8?=",
139                "Thís ís válíd ÚTF8",
140                false,
141            ),
142            (
143                "?Iso-8859-6?Q?=E5=D1=CD=C8=C7 =C8=C7=E4=D9=C7=E4=E5?=",
144                "مرحبا بالعالم",
145                true,
146            ),
147            (
148                "?Iso-8859-6*arabic?b?5dHNyMcgyMfk2cfk5Q==?=",
149                "مرحبا بالعالم",
150                true,
151            ),
152            #[cfg(feature = "full_encoding")]
153            (
154                "?shift_jis?B?g26DjYFbgUWDj4Fbg4uDaA==?=",
155                "ハロー・ワールド",
156                true,
157            ),
158            #[cfg(feature = "full_encoding")]
159            (
160                "?iso-2022-jp?q?=1B$B%O%m!<!&%o!<%k%I=1B(B?=",
161                "ハロー・ワールド",
162                true,
163            ),
164        ] {
165            match MessageStream::new(input.as_bytes()).decode_rfc2047() {
166                Some(result) => {
167                    //println!("Decoded '{}'", string);
168                    assert_eq!(result, expected_result);
169                }
170                _ => panic!("Failed to decode '{}'", input),
171            }
172        }
173    }
174}