1use encoding_rs::Encoding;
2
3use base64::{engine::general_purpose, Engine as _};
4use nom::{
5 branch::alt,
6 bytes::complete::{tag, take, take_while, take_while1},
7 character::complete::one_of,
8 character::is_alphanumeric,
9 combinator::{map, opt},
10 multi::{many0, many1},
11 sequence::{preceded, terminated, tuple},
12 IResult,
13};
14
15use crate::text::ascii;
16use crate::text::whitespace::cfws;
17use crate::text::words;
18
19pub fn encoded_word(input: &[u8]) -> IResult<&[u8], EncodedWord> {
20 alt((encoded_word_quoted, encoded_word_base64))(input)
21}
22
23pub fn encoded_word_quoted(input: &[u8]) -> IResult<&[u8], EncodedWord> {
24 let (rest, (_, _, charset, _, _, _, txt, _, _)) = tuple((
25 opt(cfws),
26 tag("=?"),
27 words::mime_atom,
28 tag("?"),
29 one_of("Qq"),
30 tag("?"),
31 ptext,
32 tag("?="),
33 opt(cfws),
34 ))(input)?;
35
36 let renc = Encoding::for_label(charset).unwrap_or(encoding_rs::WINDOWS_1252);
37 let parsed = EncodedWord::Quoted(QuotedWord {
38 enc: renc,
39 chunks: txt,
40 });
41 Ok((rest, parsed))
42}
43
44pub fn encoded_word_base64(input: &[u8]) -> IResult<&[u8], EncodedWord> {
45 let (rest, (_, charset, _, _, _, txt, _)) = tuple((
46 tag("=?"),
47 words::mime_atom,
48 tag("?"),
49 one_of("Bb"),
50 tag("?"),
51 btext,
52 tag("?="),
53 ))(input)?;
54
55 let renc = Encoding::for_label(charset).unwrap_or(encoding_rs::WINDOWS_1252);
56 let parsed = EncodedWord::Base64(Base64Word {
57 enc: renc,
58 content: txt,
59 });
60 Ok((rest, parsed))
61}
62
63#[derive(PartialEq, Debug, Clone)]
64pub enum EncodedWord<'a> {
65 Quoted(QuotedWord<'a>),
66 Base64(Base64Word<'a>),
67}
68impl<'a> EncodedWord<'a> {
69 pub fn to_string(&self) -> String {
70 match self {
71 EncodedWord::Quoted(v) => v.to_string(),
72 EncodedWord::Base64(v) => v.to_string(),
73 }
74 }
75}
76
77#[derive(PartialEq, Debug, Clone)]
78pub struct Base64Word<'a> {
79 pub enc: &'static Encoding,
80 pub content: &'a [u8],
81}
82
83impl<'a> Base64Word<'a> {
84 pub fn to_string(&self) -> String {
85 general_purpose::STANDARD_NO_PAD
86 .decode(self.content)
87 .map(|d| self.enc.decode(d.as_slice()).0.to_string())
88 .unwrap_or("".into())
89 }
90}
91
92#[derive(PartialEq, Debug, Clone)]
93pub struct QuotedWord<'a> {
94 pub enc: &'static Encoding,
95 pub chunks: Vec<QuotedChunk<'a>>,
96}
97
98impl<'a> QuotedWord<'a> {
99 pub fn to_string(&self) -> String {
100 self.chunks.iter().fold(String::new(), |mut acc, c| {
101 match c {
102 QuotedChunk::Safe(v) => {
103 let (content, _) = encoding_rs::UTF_8.decode_without_bom_handling(v);
104 acc.push_str(content.as_ref());
105 }
106 QuotedChunk::Space => acc.push(' '),
107 QuotedChunk::Encoded(v) => {
108 let (d, _) = self.enc.decode_without_bom_handling(v.as_slice());
109 acc.push_str(d.as_ref());
110 }
111 };
112 acc
113 })
114 }
115}
116
117#[derive(PartialEq, Debug, Clone)]
118pub enum QuotedChunk<'a> {
119 Safe(&'a [u8]),
120 Encoded(Vec<u8>),
121 Space,
122}
123
124pub fn ptext(input: &[u8]) -> IResult<&[u8], Vec<QuotedChunk>> {
126 many0(alt((safe_char2, encoded_space, many_hex_octet)))(input)
127}
128
129fn safe_char2(input: &[u8]) -> IResult<&[u8], QuotedChunk> {
130 map(take_while1(is_safe_char2), QuotedChunk::Safe)(input)
131}
132
133fn is_safe_char2(c: u8) -> bool {
138 c >= ascii::SP && c != ascii::UNDERSCORE && c != ascii::QUESTION && c != ascii::EQ
139}
140
141fn encoded_space(input: &[u8]) -> IResult<&[u8], QuotedChunk> {
142 map(tag("_"), |_| QuotedChunk::Space)(input)
143}
144
145fn hex_octet(input: &[u8]) -> IResult<&[u8], u8> {
146 use nom::error::*;
147
148 let (rest, hbytes) = preceded(tag("="), take(2usize))(input)?;
149
150 let hstr = String::from_utf8_lossy(hbytes);
151 let parsed = u8::from_str_radix(hstr.as_ref(), 16)
152 .map_err(|_| nom::Err::Error(Error::new(input, ErrorKind::Verify)))?;
153
154 Ok((rest, parsed))
155}
156
157fn many_hex_octet(input: &[u8]) -> IResult<&[u8], QuotedChunk> {
158 map(many1(hex_octet), QuotedChunk::Encoded)(input)
159}
160
161pub fn btext(input: &[u8]) -> IResult<&[u8], &[u8]> {
163 terminated(take_while(is_bchar), many0(tag("=")))(input)
164}
165
166fn is_bchar(c: u8) -> bool {
167 is_alphanumeric(c) || c == ascii::PLUS || c == ascii::SLASH
168}
169
170#[cfg(test)]
171mod tests {
172 use super::*;
173
174 #[test]
176 fn test_ptext() {
177 assert_eq!(
178 ptext(b"Accus=E9_de_r=E9ception_(affich=E9)"),
179 Ok((
180 &b""[..],
181 vec![
182 QuotedChunk::Safe(&b"Accus"[..]),
183 QuotedChunk::Encoded(vec![0xe9]),
184 QuotedChunk::Space,
185 QuotedChunk::Safe(&b"de"[..]),
186 QuotedChunk::Space,
187 QuotedChunk::Safe(&b"r"[..]),
188 QuotedChunk::Encoded(vec![0xe9]),
189 QuotedChunk::Safe(&b"ception"[..]),
190 QuotedChunk::Space,
191 QuotedChunk::Safe(&b"(affich"[..]),
192 QuotedChunk::Encoded(vec![0xe9]),
193 QuotedChunk::Safe(&b")"[..]),
194 ]
195 ))
196 );
197 }
198
199 #[test]
200 fn test_decode_word() {
201 assert_eq!(
202 encoded_word(b"=?iso8859-1?Q?Accus=E9_de_r=E9ception_(affich=E9)?=")
203 .unwrap()
204 .1
205 .to_string(),
206 "Accusé de réception (affiché)".to_string(),
207 );
208 }
209
210 #[test]
212 fn test_decode_word_b64() {
213 assert_eq!(
214 encoded_word(b"=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=")
215 .unwrap()
216 .1
217 .to_string(),
218 "If you can read this yo".to_string(),
219 );
220 }
221
222 #[test]
223 fn test_strange_quoted() {
224 assert_eq!(
225 encoded_word(b"=?UTF-8?Q?John_Sm=C3=AEth?=")
226 .unwrap()
227 .1
228 .to_string(),
229 "John Smîth".to_string(),
230 );
231 }
232}