Skip to main content

mail_parser/decoders/
encoded_word.rs

1/*
2 * SPDX-FileCopyrightText: 2020 Stalwart Labs LLC <hello@stalw.art>
3 *
4 * SPDX-License-Identifier: Apache-2.0 OR MIT
5 */
6
7use super::DecodeWordFnc;
8use crate::{decoders::charsets::map::charset_decoder, parsers::MessageStream};
9
10enum Rfc2047State {
11    Init,
12    Charset,
13    Encoding,
14    Data,
15}
16
17impl MessageStream<'_> {
18    pub fn decode_rfc2047(&mut self) -> Option<String> {
19        let mut state = Rfc2047State::Init;
20
21        let mut charset_start = 0;
22        let mut charset_end = 0;
23        let mut decode_fnc: Option<DecodeWordFnc<'_>> = None;
24
25        while let Some(ch) = self.next() {
26            match state {
27                Rfc2047State::Init => {
28                    if ch != &b'?' {
29                        return None;
30                    }
31                    state = Rfc2047State::Charset;
32                    charset_start = self.offset();
33                    charset_end = self.offset();
34                }
35                Rfc2047State::Charset => match ch {
36                    b'?' => {
37                        if charset_end == charset_start {
38                            charset_end = self.offset() - 1;
39                        }
40                        if (charset_end - charset_start) < 2 {
41                            return None;
42                        }
43                        state = Rfc2047State::Encoding;
44                    }
45                    b'*' if charset_end == charset_start => {
46                        charset_end = self.offset() - 1;
47                    }
48                    b'\n' => {
49                        return None;
50                    }
51                    _ => (),
52                },
53                Rfc2047State::Encoding => {
54                    match ch {
55                        b'q' | b'Q' => {
56                            decode_fnc = Some(MessageStream::decode_quoted_printable_word)
57                        }
58                        b'b' | b'B' => decode_fnc = Some(MessageStream::decode_base64_word),
59                        _ => {
60                            return None;
61                        }
62                    }
63                    state = Rfc2047State::Data;
64                }
65                Rfc2047State::Data => {
66                    if ch != &b'?' {
67                        return None;
68                    } else {
69                        break;
70                    }
71                }
72            }
73        }
74
75        if let Some(bytes) = decode_fnc.and_then(|fnc| fnc(self)) {
76            if let Some(decoder) = charset_decoder(self.bytes(charset_start..charset_end)) {
77                decoder(&bytes).into()
78            } else {
79                String::from_utf8(bytes)
80                    .unwrap_or_else(|e| String::from_utf8_lossy(e.as_bytes()).into_owned())
81                    .into()
82            }
83        } else {
84            None
85        }
86    }
87}
88#[cfg(test)]
89mod tests {
90    use crate::parsers::MessageStream;
91
92    #[test]
93    fn decode_rfc2047() {
94        for (input, expected_result, _) in [
95            (
96                "?iso-8859-1?q?this=20is=20some=20text?=",
97                "this is some text",
98                true,
99            ),
100            (
101                "?iso-8859-1?q?this is some text?=",
102                "this is some text",
103                true,
104            ),
105            ("?US-ASCII?Q?Keith_Moore?=", "Keith Moore", false),
106            (
107                "?iso_8859-1:1987?Q?Keld_J=F8rn_Simonsen?=",
108                "Keld Jørn Simonsen",
109                true,
110            ),
111            (
112                "?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=",
113                "If you can read this yo",
114                true,
115            ),
116            (
117                "?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=",
118                "u understand the example.",
119                true,
120            ),
121            ("?ISO-8859-1?Q?Olle_J=E4rnefors?=", "Olle Järnefors", true),
122            (
123                "?ISO-8859-1?Q?Patrik_F=E4ltstr=F6m?=",
124                "Patrik Fältström",
125                true,
126            ),
127            ("?ISO-8859-1*?Q?a?=", "a", true),
128            ("?ISO-8859-1**?Q?a_b?=", "a b", true),
129            (
130                "?utf-8?b?VGjDrXMgw61zIHbDoWzDrWQgw5pURjg=?=",
131                "Thís ís válíd ÚTF8",
132                false,
133            ),
134            (
135                "?utf-8*unknown?q?Th=C3=ADs_=C3=ADs_v=C3=A1l=C3=ADd_=C3=9ATF8?=",
136                "Thís ís válíd ÚTF8",
137                false,
138            ),
139            (
140                "?Iso-8859-6?Q?=E5=D1=CD=C8=C7 =C8=C7=E4=D9=C7=E4=E5?=",
141                "مرحبا بالعالم",
142                true,
143            ),
144            (
145                "?Iso-8859-6*arabic?b?5dHNyMcgyMfk2cfk5Q==?=",
146                "مرحبا بالعالم",
147                true,
148            ),
149            #[cfg(feature = "full_encoding")]
150            (
151                "?shift_jis?B?g26DjYFbgUWDj4Fbg4uDaA==?=",
152                "ハロー・ワールド",
153                true,
154            ),
155            #[cfg(feature = "full_encoding")]
156            (
157                "?iso-2022-jp?q?=1B$B%O%m!<!&%o!<%k%I=1B(B?=",
158                "ハロー・ワールド",
159                true,
160            ),
161        ] {
162            match MessageStream::new(input.as_bytes()).decode_rfc2047() {
163                Some(result) => {
164                    //println!("Decoded '{}'", string);
165                    assert_eq!(result, expected_result);
166                }
167                _ => panic!("Failed to decode '{}'", input),
168            }
169        }
170    }
171}