mail_parser/decoders/
encoded_word.rs1use crate::{decoders::charsets::map::charset_decoder, parsers::MessageStream};
8
9use super::DecodeWordFnc;
10
11enum Rfc2047State {
12 Init,
13 Charset,
14 Encoding,
15 Data,
16}
17
18impl MessageStream<'_> {
19 pub fn decode_rfc2047(&mut self) -> Option<String> {
20 let mut state = Rfc2047State::Init;
21
22 let mut charset_start = 0;
23 let mut charset_end = 0;
24 let mut decode_fnc: Option<DecodeWordFnc<'_>> = None;
25
26 while let Some(ch) = self.next() {
27 match state {
28 Rfc2047State::Init => {
29 if ch != &b'?' {
30 return None;
31 }
32 state = Rfc2047State::Charset;
33 charset_start = self.offset();
34 charset_end = self.offset();
35 }
36 Rfc2047State::Charset => match ch {
37 b'?' => {
38 if charset_end == charset_start {
39 charset_end = self.offset() - 1;
40 }
41 if (charset_end - charset_start) < 2 {
42 return None;
43 }
44 state = Rfc2047State::Encoding;
45 }
46 b'*' => {
47 if charset_end == charset_start {
48 charset_end = self.offset() - 1;
49 }
50 }
51 b'\n' => {
52 return None;
53 }
54 _ => (),
55 },
56 Rfc2047State::Encoding => {
57 match ch {
58 b'q' | b'Q' => {
59 decode_fnc = Some(MessageStream::decode_quoted_printable_word)
60 }
61 b'b' | b'B' => decode_fnc = Some(MessageStream::decode_base64_word),
62 _ => {
63 return None;
64 }
65 }
66 state = Rfc2047State::Data;
67 }
68 Rfc2047State::Data => {
69 if ch != &b'?' {
70 return None;
71 } else {
72 break;
73 }
74 }
75 }
76 }
77
78 if let Some(bytes) = decode_fnc.and_then(|fnc| fnc(self)) {
79 if let Some(decoder) = charset_decoder(self.bytes(charset_start..charset_end)) {
80 decoder(&bytes).into()
81 } else {
82 String::from_utf8(bytes)
83 .unwrap_or_else(|e| String::from_utf8_lossy(e.as_bytes()).into_owned())
84 .into()
85 }
86 } else {
87 None
88 }
89 }
90}
91#[cfg(test)]
92mod tests {
93 use crate::parsers::MessageStream;
94
95 #[test]
96 fn decode_rfc2047() {
97 for (input, expected_result, _) in [
98 (
99 "?iso-8859-1?q?this=20is=20some=20text?=",
100 "this is some text",
101 true,
102 ),
103 (
104 "?iso-8859-1?q?this is some text?=",
105 "this is some text",
106 true,
107 ),
108 ("?US-ASCII?Q?Keith_Moore?=", "Keith Moore", false),
109 (
110 "?iso_8859-1:1987?Q?Keld_J=F8rn_Simonsen?=",
111 "Keld Jørn Simonsen",
112 true,
113 ),
114 (
115 "?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=",
116 "If you can read this yo",
117 true,
118 ),
119 (
120 "?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=",
121 "u understand the example.",
122 true,
123 ),
124 ("?ISO-8859-1?Q?Olle_J=E4rnefors?=", "Olle Järnefors", true),
125 (
126 "?ISO-8859-1?Q?Patrik_F=E4ltstr=F6m?=",
127 "Patrik Fältström",
128 true,
129 ),
130 ("?ISO-8859-1*?Q?a?=", "a", true),
131 ("?ISO-8859-1**?Q?a_b?=", "a b", true),
132 (
133 "?utf-8?b?VGjDrXMgw61zIHbDoWzDrWQgw5pURjg=?=",
134 "Thís ís válíd ÚTF8",
135 false,
136 ),
137 (
138 "?utf-8*unknown?q?Th=C3=ADs_=C3=ADs_v=C3=A1l=C3=ADd_=C3=9ATF8?=",
139 "Thís ís válíd ÚTF8",
140 false,
141 ),
142 (
143 "?Iso-8859-6?Q?=E5=D1=CD=C8=C7 =C8=C7=E4=D9=C7=E4=E5?=",
144 "مرحبا بالعالم",
145 true,
146 ),
147 (
148 "?Iso-8859-6*arabic?b?5dHNyMcgyMfk2cfk5Q==?=",
149 "مرحبا بالعالم",
150 true,
151 ),
152 #[cfg(feature = "full_encoding")]
153 (
154 "?shift_jis?B?g26DjYFbgUWDj4Fbg4uDaA==?=",
155 "ハロー・ワールド",
156 true,
157 ),
158 #[cfg(feature = "full_encoding")]
159 (
160 "?iso-2022-jp?q?=1B$B%O%m!<!&%o!<%k%I=1B(B?=",
161 "ハロー・ワールド",
162 true,
163 ),
164 ] {
165 match MessageStream::new(input.as_bytes()).decode_rfc2047() {
166 Some(result) => {
167 assert_eq!(result, expected_result);
169 }
170 _ => panic!("Failed to decode '{}'", input),
171 }
172 }
173 }
174}