1use encoding_rs::Encoding;
2
3use base64::{engine::general_purpose, Engine as _};
4use nom::{
5    branch::alt,
6    bytes::complete::{tag, take, take_while, take_while1},
7    character::complete::one_of,
8    character::is_alphanumeric,
9    combinator::{map, opt},
10    multi::{many0, many1},
11    sequence::{preceded, terminated, tuple},
12    IResult,
13};
14
15use crate::text::ascii;
16use crate::text::whitespace::cfws;
17use crate::text::words;
18
19pub fn encoded_word(input: &[u8]) -> IResult<&[u8], EncodedWord> {
20    alt((encoded_word_quoted, encoded_word_base64))(input)
21}
22
23pub fn encoded_word_quoted(input: &[u8]) -> IResult<&[u8], EncodedWord> {
24    let (rest, (_, _, charset, _, _, _, txt, _, _)) = tuple((
25        opt(cfws),
26        tag("=?"),
27        words::mime_atom,
28        tag("?"),
29        one_of("Qq"),
30        tag("?"),
31        ptext,
32        tag("?="),
33        opt(cfws),
34    ))(input)?;
35
36    let renc = Encoding::for_label(charset).unwrap_or(encoding_rs::WINDOWS_1252);
37    let parsed = EncodedWord::Quoted(QuotedWord {
38        enc: renc,
39        chunks: txt,
40    });
41    Ok((rest, parsed))
42}
43
44pub fn encoded_word_base64(input: &[u8]) -> IResult<&[u8], EncodedWord> {
45    let (rest, (_, charset, _, _, _, txt, _)) = tuple((
46        tag("=?"),
47        words::mime_atom,
48        tag("?"),
49        one_of("Bb"),
50        tag("?"),
51        btext,
52        tag("?="),
53    ))(input)?;
54
55    let renc = Encoding::for_label(charset).unwrap_or(encoding_rs::WINDOWS_1252);
56    let parsed = EncodedWord::Base64(Base64Word {
57        enc: renc,
58        content: txt,
59    });
60    Ok((rest, parsed))
61}
62
63#[derive(PartialEq, Debug, Clone)]
64pub enum EncodedWord<'a> {
65    Quoted(QuotedWord<'a>),
66    Base64(Base64Word<'a>),
67}
68impl<'a> EncodedWord<'a> {
69    pub fn to_string(&self) -> String {
70        match self {
71            EncodedWord::Quoted(v) => v.to_string(),
72            EncodedWord::Base64(v) => v.to_string(),
73        }
74    }
75}
76
77#[derive(PartialEq, Debug, Clone)]
78pub struct Base64Word<'a> {
79    pub enc: &'static Encoding,
80    pub content: &'a [u8],
81}
82
83impl<'a> Base64Word<'a> {
84    pub fn to_string(&self) -> String {
85        general_purpose::STANDARD_NO_PAD
86            .decode(self.content)
87            .map(|d| self.enc.decode(d.as_slice()).0.to_string())
88            .unwrap_or("".into())
89    }
90}
91
92#[derive(PartialEq, Debug, Clone)]
93pub struct QuotedWord<'a> {
94    pub enc: &'static Encoding,
95    pub chunks: Vec<QuotedChunk<'a>>,
96}
97
98impl<'a> QuotedWord<'a> {
99    pub fn to_string(&self) -> String {
100        self.chunks.iter().fold(String::new(), |mut acc, c| {
101            match c {
102                QuotedChunk::Safe(v) => {
103                    let (content, _) = encoding_rs::UTF_8.decode_without_bom_handling(v);
104                    acc.push_str(content.as_ref());
105                }
106                QuotedChunk::Space => acc.push(' '),
107                QuotedChunk::Encoded(v) => {
108                    let (d, _) = self.enc.decode_without_bom_handling(v.as_slice());
109                    acc.push_str(d.as_ref());
110                }
111            };
112            acc
113        })
114    }
115}
116
117#[derive(PartialEq, Debug, Clone)]
118pub enum QuotedChunk<'a> {
119    Safe(&'a [u8]),
120    Encoded(Vec<u8>),
121    Space,
122}
123
124pub fn ptext(input: &[u8]) -> IResult<&[u8], Vec<QuotedChunk>> {
126    many0(alt((safe_char2, encoded_space, many_hex_octet)))(input)
127}
128
129fn safe_char2(input: &[u8]) -> IResult<&[u8], QuotedChunk> {
130    map(take_while1(is_safe_char2), QuotedChunk::Safe)(input)
131}
132
133fn is_safe_char2(c: u8) -> bool {
138    c >= ascii::SP && c != ascii::UNDERSCORE && c != ascii::QUESTION && c != ascii::EQ
139}
140
141fn encoded_space(input: &[u8]) -> IResult<&[u8], QuotedChunk> {
142    map(tag("_"), |_| QuotedChunk::Space)(input)
143}
144
145fn hex_octet(input: &[u8]) -> IResult<&[u8], u8> {
146    use nom::error::*;
147
148    let (rest, hbytes) = preceded(tag("="), take(2usize))(input)?;
149
150    let hstr = String::from_utf8_lossy(hbytes);
151    let parsed = u8::from_str_radix(hstr.as_ref(), 16)
152        .map_err(|_| nom::Err::Error(Error::new(input, ErrorKind::Verify)))?;
153
154    Ok((rest, parsed))
155}
156
157fn many_hex_octet(input: &[u8]) -> IResult<&[u8], QuotedChunk> {
158    map(many1(hex_octet), QuotedChunk::Encoded)(input)
159}
160
161pub fn btext(input: &[u8]) -> IResult<&[u8], &[u8]> {
163    terminated(take_while(is_bchar), many0(tag("=")))(input)
164}
165
166fn is_bchar(c: u8) -> bool {
167    is_alphanumeric(c) || c == ascii::PLUS || c == ascii::SLASH
168}
169
170#[cfg(test)]
171mod tests {
172    use super::*;
173
174    #[test]
176    fn test_ptext() {
177        assert_eq!(
178            ptext(b"Accus=E9_de_r=E9ception_(affich=E9)"),
179            Ok((
180                &b""[..],
181                vec![
182                    QuotedChunk::Safe(&b"Accus"[..]),
183                    QuotedChunk::Encoded(vec![0xe9]),
184                    QuotedChunk::Space,
185                    QuotedChunk::Safe(&b"de"[..]),
186                    QuotedChunk::Space,
187                    QuotedChunk::Safe(&b"r"[..]),
188                    QuotedChunk::Encoded(vec![0xe9]),
189                    QuotedChunk::Safe(&b"ception"[..]),
190                    QuotedChunk::Space,
191                    QuotedChunk::Safe(&b"(affich"[..]),
192                    QuotedChunk::Encoded(vec![0xe9]),
193                    QuotedChunk::Safe(&b")"[..]),
194                ]
195            ))
196        );
197    }
198
199    #[test]
200    fn test_decode_word() {
201        assert_eq!(
202            encoded_word(b"=?iso8859-1?Q?Accus=E9_de_r=E9ception_(affich=E9)?=")
203                .unwrap()
204                .1
205                .to_string(),
206            "Accusé de réception (affiché)".to_string(),
207        );
208    }
209
210    #[test]
212    fn test_decode_word_b64() {
213        assert_eq!(
214            encoded_word(b"=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=")
215                .unwrap()
216                .1
217                .to_string(),
218            "If you can read this yo".to_string(),
219        );
220    }
221
222    #[test]
223    fn test_strange_quoted() {
224        assert_eq!(
225            encoded_word(b"=?UTF-8?Q?John_Sm=C3=AEth?=")
226                .unwrap()
227                .1
228                .to_string(),
229            "John Smîth".to_string(),
230        );
231    }
232}