mail_parser/decoders/
encoded_word.rs1use super::DecodeWordFnc;
8use crate::{decoders::charsets::map::charset_decoder, parsers::MessageStream};
9
10enum Rfc2047State {
11 Init,
12 Charset,
13 Encoding,
14 Data,
15}
16
17impl MessageStream<'_> {
18 pub fn decode_rfc2047(&mut self) -> Option<String> {
19 let mut state = Rfc2047State::Init;
20
21 let mut charset_start = 0;
22 let mut charset_end = 0;
23 let mut decode_fnc: Option<DecodeWordFnc<'_>> = None;
24
25 while let Some(ch) = self.next() {
26 match state {
27 Rfc2047State::Init => {
28 if ch != &b'?' {
29 return None;
30 }
31 state = Rfc2047State::Charset;
32 charset_start = self.offset();
33 charset_end = self.offset();
34 }
35 Rfc2047State::Charset => match ch {
36 b'?' => {
37 if charset_end == charset_start {
38 charset_end = self.offset() - 1;
39 }
40 if (charset_end - charset_start) < 2 {
41 return None;
42 }
43 state = Rfc2047State::Encoding;
44 }
45 b'*' if charset_end == charset_start => {
46 charset_end = self.offset() - 1;
47 }
48 b'\n' => {
49 return None;
50 }
51 _ => (),
52 },
53 Rfc2047State::Encoding => {
54 match ch {
55 b'q' | b'Q' => {
56 decode_fnc = Some(MessageStream::decode_quoted_printable_word)
57 }
58 b'b' | b'B' => decode_fnc = Some(MessageStream::decode_base64_word),
59 _ => {
60 return None;
61 }
62 }
63 state = Rfc2047State::Data;
64 }
65 Rfc2047State::Data => {
66 if ch != &b'?' {
67 return None;
68 } else {
69 break;
70 }
71 }
72 }
73 }
74
75 if let Some(bytes) = decode_fnc.and_then(|fnc| fnc(self)) {
76 if let Some(decoder) = charset_decoder(self.bytes(charset_start..charset_end)) {
77 decoder(&bytes).into()
78 } else {
79 String::from_utf8(bytes)
80 .unwrap_or_else(|e| String::from_utf8_lossy(e.as_bytes()).into_owned())
81 .into()
82 }
83 } else {
84 None
85 }
86 }
87}
88#[cfg(test)]
89mod tests {
90 use crate::parsers::MessageStream;
91
92 #[test]
93 fn decode_rfc2047() {
94 for (input, expected_result, _) in [
95 (
96 "?iso-8859-1?q?this=20is=20some=20text?=",
97 "this is some text",
98 true,
99 ),
100 (
101 "?iso-8859-1?q?this is some text?=",
102 "this is some text",
103 true,
104 ),
105 ("?US-ASCII?Q?Keith_Moore?=", "Keith Moore", false),
106 (
107 "?iso_8859-1:1987?Q?Keld_J=F8rn_Simonsen?=",
108 "Keld Jørn Simonsen",
109 true,
110 ),
111 (
112 "?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=",
113 "If you can read this yo",
114 true,
115 ),
116 (
117 "?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=",
118 "u understand the example.",
119 true,
120 ),
121 ("?ISO-8859-1?Q?Olle_J=E4rnefors?=", "Olle Järnefors", true),
122 (
123 "?ISO-8859-1?Q?Patrik_F=E4ltstr=F6m?=",
124 "Patrik Fältström",
125 true,
126 ),
127 ("?ISO-8859-1*?Q?a?=", "a", true),
128 ("?ISO-8859-1**?Q?a_b?=", "a b", true),
129 (
130 "?utf-8?b?VGjDrXMgw61zIHbDoWzDrWQgw5pURjg=?=",
131 "Thís ís válíd ÚTF8",
132 false,
133 ),
134 (
135 "?utf-8*unknown?q?Th=C3=ADs_=C3=ADs_v=C3=A1l=C3=ADd_=C3=9ATF8?=",
136 "Thís ís válíd ÚTF8",
137 false,
138 ),
139 (
140 "?Iso-8859-6?Q?=E5=D1=CD=C8=C7 =C8=C7=E4=D9=C7=E4=E5?=",
141 "مرحبا بالعالم",
142 true,
143 ),
144 (
145 "?Iso-8859-6*arabic?b?5dHNyMcgyMfk2cfk5Q==?=",
146 "مرحبا بالعالم",
147 true,
148 ),
149 #[cfg(feature = "full_encoding")]
150 (
151 "?shift_jis?B?g26DjYFbgUWDj4Fbg4uDaA==?=",
152 "ハロー・ワールド",
153 true,
154 ),
155 #[cfg(feature = "full_encoding")]
156 (
157 "?iso-2022-jp?q?=1B$B%O%m!<!&%o!<%k%I=1B(B?=",
158 "ハロー・ワールド",
159 true,
160 ),
161 ] {
162 match MessageStream::new(input.as_bytes()).decode_rfc2047() {
163 Some(result) => {
164 assert_eq!(result, expected_result);
166 }
167 _ => panic!("Failed to decode '{}'", input),
168 }
169 }
170 }
171}