rfc2047_decoder/
parser.rs

1use charset::Charset;
2use std::{convert::TryFrom, result};
3
4use crate::lexer::{encoded_word, Token, Tokens};
5
6/// All errors which the parser can throw.
7#[derive(thiserror::Error, Debug, Clone, PartialEq)]
8pub enum Error {
9    #[error("cannot parse encoding: encoding is bigger than a char")]
10    ParseEncodingTooBigError,
11    #[error("cannot parse encoding: encoding is empty")]
12    ParseEncodingEmptyError,
13    #[error("cannot parse encoding {0}: B or Q is expected")]
14    ParseEncodingError(char),
15}
16
17type Result<T> = result::Result<T, Error>;
18
19pub type ClearText = Vec<u8>;
20pub type ParsedEncodedWords = Vec<ParsedEncodedWord>;
21
22#[derive(Debug, Clone, PartialEq, Eq, Hash)]
23pub enum Encoding {
24    B,
25    Q,
26}
27
28impl Encoding {
29    pub const B_CHAR: char = 'b';
30    pub const Q_CHAR: char = 'q';
31    pub const MAX_LENGTH: usize = 1;
32}
33
34impl TryFrom<Vec<u8>> for Encoding {
35    type Error = Error;
36
37    fn try_from(token: Vec<u8>) -> Result<Self> {
38        if token.len() > Self::MAX_LENGTH {
39            return Err(Error::ParseEncodingTooBigError);
40        }
41
42        let encoding = token.first().ok_or(Error::ParseEncodingEmptyError)?;
43        let encoding = *encoding as char;
44
45        match encoding.to_ascii_lowercase() {
46            Encoding::Q_CHAR => Ok(Self::Q),
47            Encoding::B_CHAR => Ok(Self::B),
48            _ => Err(Error::ParseEncodingError(encoding)),
49        }
50    }
51}
52
53#[derive(Debug, Clone, PartialEq, Hash)]
54pub enum ParsedEncodedWord {
55    ClearText(ClearText),
56    EncodedWord {
57        charset: Option<Charset>,
58        encoding: Encoding,
59        encoded_text: Vec<u8>,
60    },
61}
62
63impl ParsedEncodedWord {
64    pub fn convert_encoded_word(encoded_word: encoded_word::EncodedWord) -> Result<Self> {
65        let encoding = Encoding::try_from(encoded_word.encoding)?;
66        let charset = Charset::for_label(&encoded_word.charset);
67
68        Ok(Self::EncodedWord {
69            charset,
70            encoding,
71            encoded_text: encoded_word.encoded_text,
72        })
73    }
74}
75
76pub fn run(tokens: Tokens) -> Result<ParsedEncodedWords> {
77    let parsed_encoded_words = convert_tokens_to_encoded_words(tokens)?;
78    Ok(parsed_encoded_words)
79}
80
81fn convert_tokens_to_encoded_words(tokens: Tokens) -> Result<ParsedEncodedWords> {
82    tokens
83        .into_iter()
84        .map(|token: Token| match token {
85            Token::ClearText(clear_text) => Ok(ParsedEncodedWord::ClearText(clear_text)),
86            Token::EncodedWord(encoded_word) => {
87                ParsedEncodedWord::convert_encoded_word(encoded_word)
88            }
89        })
90        .collect()
91}
92
93#[cfg(test)]
94mod tests {
95    use charset::Charset;
96
97    use crate::{
98        lexer,
99        parser::{self, Encoding, ParsedEncodedWord},
100        Decoder,
101    };
102
103    /// Example taken from:
104    /// https://datatracker.ietf.org/doc/html/rfc2047#section-8
105    ///
106    /// `From` field
107    #[test]
108    fn test_parse1() {
109        let message = "=?US-ASCII?Q?Keith_Moore?=".as_bytes();
110        let tokens = lexer::run(&message, Decoder::new()).unwrap();
111        let parsed = parser::run(tokens).unwrap();
112
113        let expected = vec![ParsedEncodedWord::EncodedWord {
114            charset: Charset::for_label("US-ASCII".as_bytes()),
115            encoding: Encoding::Q,
116            encoded_text: "Keith_Moore".as_bytes().to_vec(),
117        }];
118
119        assert_eq!(parsed, expected);
120    }
121
122    /// Example taken from:
123    /// https://datatracker.ietf.org/doc/html/rfc2047#section-8
124    ///
125    /// `To` field
126    #[test]
127    fn test_parse2() {
128        let message = "=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?=".as_bytes();
129        let tokens = lexer::run(&message, Decoder::new()).unwrap();
130        let parsed = parser::run(tokens).unwrap();
131
132        let expected = vec![ParsedEncodedWord::EncodedWord {
133            charset: Charset::for_label("ISO-8859-1".as_bytes()),
134            encoding: Encoding::Q,
135            encoded_text: "Keld_J=F8rn_Simonsen".as_bytes().to_vec(),
136        }];
137
138        assert_eq!(parsed, expected);
139    }
140
141    /// Example taken from:
142    /// https://datatracker.ietf.org/doc/html/rfc2047#section-8
143    ///
144    /// `CC` field
145    #[test]
146    fn test_parse3() {
147        let message = "=?ISO-8859-1?Q?Andr=E9?=".as_bytes();
148        let tokens = lexer::run(&message, Decoder::new()).unwrap();
149        let parsed = parser::run(tokens).unwrap();
150
151        let expected = vec![ParsedEncodedWord::EncodedWord {
152            charset: Charset::for_label("ISO-8859-1".as_bytes()),
153            encoding: Encoding::Q,
154            encoded_text: "Andr=E9".as_bytes().to_vec(),
155        }];
156
157        assert_eq!(parsed, expected);
158    }
159
160    /// Example taken from:
161    /// https://datatracker.ietf.org/doc/html/rfc2047#section-8
162    ///
163    /// `Subject` field
164    #[test]
165    fn test_parse4() {
166        let message = "=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=".as_bytes();
167        let tokens = lexer::run(&message, Decoder::new()).unwrap();
168        let parsed = parser::run(tokens).unwrap();
169
170        let expected = vec![ParsedEncodedWord::EncodedWord {
171            charset: Charset::for_label("ISO-8859-1".as_bytes()),
172            encoding: Encoding::B,
173            encoded_text: "SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=".as_bytes().to_vec(),
174        }];
175
176        assert_eq!(parsed, expected);
177    }
178}