rfc2047_decoder/
parser.rs1use charset::Charset;
2use std::{convert::TryFrom, result};
3
4use crate::lexer::{encoded_word, Token, Tokens};
5
6#[derive(thiserror::Error, Debug, Clone, PartialEq)]
8pub enum Error {
9 #[error("cannot parse encoding: encoding is bigger than a char")]
10 ParseEncodingTooBigError,
11 #[error("cannot parse encoding: encoding is empty")]
12 ParseEncodingEmptyError,
13 #[error("cannot parse encoding {0}: B or Q is expected")]
14 ParseEncodingError(char),
15}
16
17type Result<T> = result::Result<T, Error>;
18
19pub type ClearText = Vec<u8>;
20pub type ParsedEncodedWords = Vec<ParsedEncodedWord>;
21
22#[derive(Debug, Clone, PartialEq, Eq, Hash)]
23pub enum Encoding {
24 B,
25 Q,
26}
27
28impl Encoding {
29 pub const B_CHAR: char = 'b';
30 pub const Q_CHAR: char = 'q';
31 pub const MAX_LENGTH: usize = 1;
32}
33
34impl TryFrom<Vec<u8>> for Encoding {
35 type Error = Error;
36
37 fn try_from(token: Vec<u8>) -> Result<Self> {
38 if token.len() > Self::MAX_LENGTH {
39 return Err(Error::ParseEncodingTooBigError);
40 }
41
42 let encoding = token.first().ok_or(Error::ParseEncodingEmptyError)?;
43 let encoding = *encoding as char;
44
45 match encoding.to_ascii_lowercase() {
46 Encoding::Q_CHAR => Ok(Self::Q),
47 Encoding::B_CHAR => Ok(Self::B),
48 _ => Err(Error::ParseEncodingError(encoding)),
49 }
50 }
51}
52
53#[derive(Debug, Clone, PartialEq, Hash)]
54pub enum ParsedEncodedWord {
55 ClearText(ClearText),
56 EncodedWord {
57 charset: Option<Charset>,
58 encoding: Encoding,
59 encoded_text: Vec<u8>,
60 },
61}
62
63impl ParsedEncodedWord {
64 pub fn convert_encoded_word(encoded_word: encoded_word::EncodedWord) -> Result<Self> {
65 let encoding = Encoding::try_from(encoded_word.encoding)?;
66 let charset = Charset::for_label(&encoded_word.charset);
67
68 Ok(Self::EncodedWord {
69 charset,
70 encoding,
71 encoded_text: encoded_word.encoded_text,
72 })
73 }
74}
75
76pub fn run(tokens: Tokens) -> Result<ParsedEncodedWords> {
77 let parsed_encoded_words = convert_tokens_to_encoded_words(tokens)?;
78 Ok(parsed_encoded_words)
79}
80
81fn convert_tokens_to_encoded_words(tokens: Tokens) -> Result<ParsedEncodedWords> {
82 tokens
83 .into_iter()
84 .map(|token: Token| match token {
85 Token::ClearText(clear_text) => Ok(ParsedEncodedWord::ClearText(clear_text)),
86 Token::EncodedWord(encoded_word) => {
87 ParsedEncodedWord::convert_encoded_word(encoded_word)
88 }
89 })
90 .collect()
91}
92
93#[cfg(test)]
94mod tests {
95 use charset::Charset;
96
97 use crate::{
98 lexer,
99 parser::{self, Encoding, ParsedEncodedWord},
100 Decoder,
101 };
102
103 #[test]
108 fn test_parse1() {
109 let message = "=?US-ASCII?Q?Keith_Moore?=".as_bytes();
110 let tokens = lexer::run(&message, Decoder::new()).unwrap();
111 let parsed = parser::run(tokens).unwrap();
112
113 let expected = vec![ParsedEncodedWord::EncodedWord {
114 charset: Charset::for_label("US-ASCII".as_bytes()),
115 encoding: Encoding::Q,
116 encoded_text: "Keith_Moore".as_bytes().to_vec(),
117 }];
118
119 assert_eq!(parsed, expected);
120 }
121
122 #[test]
127 fn test_parse2() {
128 let message = "=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?=".as_bytes();
129 let tokens = lexer::run(&message, Decoder::new()).unwrap();
130 let parsed = parser::run(tokens).unwrap();
131
132 let expected = vec![ParsedEncodedWord::EncodedWord {
133 charset: Charset::for_label("ISO-8859-1".as_bytes()),
134 encoding: Encoding::Q,
135 encoded_text: "Keld_J=F8rn_Simonsen".as_bytes().to_vec(),
136 }];
137
138 assert_eq!(parsed, expected);
139 }
140
141 #[test]
146 fn test_parse3() {
147 let message = "=?ISO-8859-1?Q?Andr=E9?=".as_bytes();
148 let tokens = lexer::run(&message, Decoder::new()).unwrap();
149 let parsed = parser::run(tokens).unwrap();
150
151 let expected = vec![ParsedEncodedWord::EncodedWord {
152 charset: Charset::for_label("ISO-8859-1".as_bytes()),
153 encoding: Encoding::Q,
154 encoded_text: "Andr=E9".as_bytes().to_vec(),
155 }];
156
157 assert_eq!(parsed, expected);
158 }
159
160 #[test]
165 fn test_parse4() {
166 let message = "=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=".as_bytes();
167 let tokens = lexer::run(&message, Decoder::new()).unwrap();
168 let parsed = parser::run(tokens).unwrap();
169
170 let expected = vec![ParsedEncodedWord::EncodedWord {
171 charset: Charset::for_label("ISO-8859-1".as_bytes()),
172 encoding: Encoding::B,
173 encoded_text: "SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=".as_bytes().to_vec(),
174 }];
175
176 assert_eq!(parsed, expected);
177 }
178}