mail_internals/grammar.rs
1//! This module contains a number of helper functions for writing parsers.
2//!
3//! Ironically they are also needed when writing mail encoders/generators
4//! e.g. for checking if a part need special encoding.
5use ::MailType;
6
7/// ftext as defined by RFC 5322
8///
9/// which is: printable US-ASCII characters not includign `:`
10/// => 0x21-0x39 / 0x3B-0x7E
11/// => '!'...'9' / ';'...'~'
12/// => <0x7F && != 0x3A
13#[inline(always)]
14pub fn is_ftext(ch: char) -> bool {
15 let bch = ch as u32;
16 bch > 32 && bch < 127 && ch != ':'
17}
18
19///WS as defined by RFC 5234
20#[inline(always)]
21pub fn is_ws(ch: char) -> bool {
22 // is not limited to ascii ws
23 //ch.is_whitespace()
24 //WSP = SP / HTAB
25 ch == ' ' || ch == '\t'
26}
27
28/// True if `ch` is `' '`
29#[inline(always)]
30pub fn is_space(ch: char) -> bool {
31 ch == ' '
32}
33
34/// True if `ch` is us-ascii (i.e. <128)
35#[inline(always)]
36pub fn is_ascii(ch: char) -> bool {
37 (ch as u32) < 128
38}
39
40/// True if `ch` is ascii and "visible"/"printable".
41///
42/// This is the case for any char in the (decimal)
43/// range 33..=126 which is '!'..='~'.
44#[inline(always)]
45pub fn is_ascii_vchar(ch: char) -> bool {
46 let u32_ch = ch as u32;
47 32 < u32_ch && u32_ch <= 126
48}
49
50/// VCHAR as defined by RFC 5243
51///
52/// Is true if it's either an us-ascii vchar or
53/// an non us-ascii char and the mail type is
54/// internationalized.
55///
56/// This mean that this includes _non printable_
57/// characters as long as the mail is internationalized
58/// and the character is non us-ascii utf-8.
59#[inline(always)]
60pub fn is_vchar(ch: char, mt: MailType) -> bool {
61 is_ascii_vchar(ch) || (mt == MailType::Internationalized && !is_ascii(ch))
62}
63
64
65//TODO as RFCs
66/// can be quoted in a quoted string (internalized) based on RFC ... and RFC ...
67#[inline(always)]
68pub fn is_quotable(ch: char, tp: MailType) -> bool {
69 is_vchar(ch, tp) || is_ws(ch)
70}
71
72/// any whitespace (char::is_whitespace)
73#[inline(always)]
74pub fn is_any_whitespace(ch: char) -> bool {
75 ch.is_whitespace()
76}
77
78/// ctext as defined by RFC 5322
79pub fn is_ctext(ch: char, mt: MailType) -> bool {
80 match ch {
81 '!'...'\'' |
82 '*'...'[' |
83 ']'...'~' => true,
84 // obs-ctext
85 _ => mt == MailType::Internationalized && !is_ascii( ch )
86 }
87}
88
89/// check if a char is a especial (_based on RFC 5322_)
90///
91/// Note that there is _another_ especial from a different RFC.
92pub fn is_special(ch: char) -> bool {
93 match ch {
94 '(' | ')' |
95 '<' | '>' |
96 '[' | ']' |
97 ':' | ';' |
98 '@' | '\\'|
99 ',' | '.' |
100 '"' => true,
101 _ => false
102 }
103}
104
105
106/// check if a char is an tspecial (based on RFC 2045)
107pub fn is_tspecial(ch: char) -> bool {
108 match ch {
109 '(' | ')' |
110 '<' | '>' |
111 '@' | ',' |
112 ';' | ':' |
113 '\\'| '"' |
114 '/' | '[' |
115 ']' | '?' |
116 '=' => true,
117 _ => false
118 }
119}
120
121
122
123/// atext as defined by RFC 5322
124#[inline(always)]
125pub fn is_atext(ch: char, tp: MailType) -> bool {
126 is_vchar(ch, tp) && !is_special(ch)
127}
128
129/// dtext as defined by RFC 5322
130#[inline(always)]
131pub fn is_dtext(ch: char , mt: MailType) -> bool {
132 match ch as u32 {
133 33...90 |
134 94...126 => true,
135 _ => mt == MailType::Internationalized && !is_ascii(ch)
136 }
137}
138
139/// qtext as defined by RFC 5322
140pub fn is_qtext(ch: char, mt: MailType) -> bool {
141 match ch {
142 //not ' ' [d:32]
143 '!' |
144 //not '"' [d:34]
145 '#'...'[' |
146 //not '\\' [d:92]
147 ']'...'~' => true,
148 _ => mt == MailType::Internationalized && !is_ascii(ch)
149 }
150}
151
152/// Chack if it is a CTL char (based on RFC 822).
153///
154/// # Note
155/// the standard specifies `'\t'` as a CTL but not `' '`
156/// but both `'\t'` and `' '` are LWSP-char i.e. semantically
157/// space i.e. _semantically equivalent_.
158#[inline(always)]
159pub fn is_ctl(ch: char) -> bool {
160 (ch as u32) < 32
161}
162
163/// Check if a char is an token char (based on RFC 2045).
164#[inline(always)]
165pub fn is_token_char(ch: char) -> bool {
166 is_ascii(ch) && !is_ctl(ch) && !is_tspecial(ch) && ch != ' '
167}
168
169
170//TODO add rfc
171/// Check if a char is especial (based on RFC ...).
172#[inline(always)]
173pub fn is_especial(ch: char) -> bool {
174 match ch {
175 '(' | ')' |
176 '<' | '>' |
177 '@' | ',' |
178 ';' | ':' |
179 '"' | '/'|
180 '[' | ']' |
181 '?' | '.' |
182 '=' => true,
183 _ => false
184 }
185}
186
187//TODO add rfc
188/// Check if a string is an token (based on RFC ...).
189pub fn is_token(s: &str) -> bool {
190 0 < s.len() && s.chars().all(is_token_char)
191}
192
193//
194//pub fn is_dot_atom_text( text: &str, mt: MailType ) -> bool {
195// use nom::IResult;
196// use self::parse::recognize_dot_atom_text;
197//
198// let res = tuple!( text,
199// call!( recognize_dot_atom_text, mt ),
200// eof!()
201// );
202//
203// match res {
204// IResult::Done(_, _) => true,
205// _ => false
206// }
207//}
208
209//pub mod parse {
210// use nom::IResult;
211// use super::{ is_atext, MailType };
212//
213// pub fn recognize_dot_atom_text( input: &str, mt: MailType ) -> IResult<&str, &str> {
214// recognize!( input, tuple!(
215// take_while1!( call!( is_atext, mt ) ),
216// many0!( tuple!(
217// char!( "." ),
218// take_while1!( call!( is_atext, mt ) )
219// ) )
220// ) )
221// }
222//
223//}
224//TODO this should be some where else I think
225// (but it is used by `1. codec`, `2. components` )
226/// Grammar parts for encoded words (based on RFC 2047).
227pub mod encoded_word {
228 use nom;
229 use ::MailType;
230 use ::error::{EncodingError, EncodingErrorKind};
231 use super::{ is_especial, is_ascii_vchar };
232
233 /// maximal length of an encoded word
234 pub const MAX_ECW_LEN: usize = 75;
235
236 /// The syntax overhead from "framing" an encoded word.
237 ///
238 /// This is the start (1x`=?`) the first and second separator (2x`?`) and the
239 /// end (1x`?=`) leading to 6 byte overhead.
240 pub const ECW_SEP_OVERHEAD: usize = 6;
241
242 /// Represents the place at which the encoded word appears.
243 ///
244 /// Depending on the place more or less character have to be
245 /// encoded.
246 ///
247 /// Note: Implementations creating encoded words might use a
248 /// stricter context which is compatible with all places to
249 /// reduce code complexity.
250 #[derive(Debug, Copy, Clone, Hash, Eq, PartialEq)]
251 pub enum EncodedWordContext {
252 Phrase,
253 Text,
254 Comment
255 }
256
257 impl EncodedWordContext {
258
259 /// Returns a (context dependent) validator to check if a char can be represented without encoding.
260 fn char_validator( &self ) -> fn(char) -> bool {
261 use self::EncodedWordContext::*;
262 match *self {
263 Phrase => valid_char_in_ec_in_phrase,
264 Text => is_encoded_word_char,
265 Comment => valid_char_in_ec_in_comment,
266 }
267 }
268 }
269
270
271 /// Returns true if the given word is a encoded word.
272 ///
273 /// Note that this depends on the context the word appears in and the mail type.
274 /// The reason for this is that encoded words tend to be valid text even without
275 /// decoding them. But this means if the encoded word has some syntax error (e.g.
276 /// missing closing `?=`) it is no longer an encoded word but just some text which
277 /// happen to look similar to one.
278 pub fn is_encoded_word(word: &str, ctx: EncodedWordContext, mail_type: MailType) -> bool {
279 try_parse_encoded_word_parts(word, ctx, mail_type).is_ok()
280 }
281
282 /// Tries to parse the given string as an encoded word.
283 pub fn try_parse_encoded_word_parts(
284 word: &str,
285 ctx: EncodedWordContext,
286 mail_type: MailType
287 ) -> Result<(&str, &str, &str), EncodingError>
288 {
289 let char_validator = ctx.char_validator();
290 // Note we could get a possible speed up by making rustc generate
291 // a different function for each Context, inlining ALL char tests
292 let res = do_parse!(
293 word,
294 char!( '=' ) >>
295 char!( '?' ) >>
296 charset: take_while!( is_ew_token_char ) >>
297 char!( '?' ) >>
298 encoding: take_while!( is_ew_token_char ) >>
299 char!( '?' ) >>
300 text: take_while!( char_validator ) >>
301 char!( '?' ) >>
302 char!( '=' ) >>
303 eof!() >>
304 (charset, encoding, text)
305 );
306
307 match res {
308 nom::IResult::Done( rest, result ) => {
309 assert_eq!(rest.len(), 0, "[BUG] used nom::eof!() but rest.len() > 0");
310 Ok( result )
311 },
312 nom::IResult::Incomplete( .. ) => {
313 return Err((EncodingErrorKind::Malformed, mail_type).into());
314 }
315 nom::IResult::Error( .. ) => {
316 return Err((EncodingErrorKind::Malformed, mail_type).into());
317 }
318 }
319 }
320
321 /// True if the char can appear in an encoded word.
322 fn is_encoded_word_char(ch: char) -> bool {
323 is_ascii_vchar(ch) && ch != '?'
324 }
325
326 /// True if the char can appear in an encoded word appearing in a comment.
327 fn valid_char_in_ec_in_comment(ch: char) -> bool {
328 is_encoded_word_char(ch) && !(ch == '(' || ch == ')' || ch == '"')
329 }
330
331 /// True if the char is valid in an encode word appearing in a phrase.
332 fn valid_char_in_ec_in_phrase(ch: char) -> bool {
333 match ch {
334 '0'...'9' |
335 'a'...'z' |
336 'A'...'Z' |
337 '!' | '*' |
338 '+' | '-' |
339 '/' | '=' |
340 '_' => true,
341 _ => false
342 }
343 }
344
345 /// True if the char is a encoded word token.
346 ///
347 /// Encoded word tokens are used for the charset and
348 /// language part of an encoded word.
349 fn is_ew_token_char(ch: char) -> bool {
350 is_ascii_vchar(ch) && !is_especial(ch)
351 }
352
353}
354
355//TODO shouldn't we use `bind/quoted_string`?
356/// True if the given string is a quoted string.
357pub fn is_quoted_string(qstr: &str, tp: MailType) -> bool {
358 let mut iter = qstr.chars();
359 if let Some('"') = iter.next() {} else { return false }
360 let mut next = iter.next();
361 while let Some(ch) = next {
362 match ch {
363 '\\' => {
364 if let Some(next_char) = iter.next() {
365 if !(is_vchar(next_char, tp) || is_ws(next_char)) {
366 return false;
367 }
368 } else {
369 return false;
370 }
371 },
372 '"' => {
373 if iter.next().is_none() {
374 return true;
375 } else {
376 return false;
377 }
378 }
379 ch => {
380 if !is_qtext(ch, tp) {
381 return false
382 }
383 }
384 }
385 next = iter.next()
386 }
387
388 // The only true return if we have a '"' followed by iter.next().is_none()
389 return false;
390}
391
392
393#[cfg(test)]
394mod test {
395 use super::*;
396
397 #[test]
398 fn _is_ascii_vchar() {
399 assert_eq!(false, is_ascii_vchar('\x7f'));
400 for bad_char in b'\0'..b' ' {
401 if is_ascii_vchar(bad_char as char) {
402 panic!("{:?} should not be a VCHAR", bad_char);
403 }
404 }
405 for good_char in b'!'..(b'~'+1) {
406 if !is_ascii_vchar(good_char as char) {
407 panic!("{:?} should be a VCHAR", good_char as char);
408 }
409 }
410 }
411
412 #[test]
413 fn htap_is_ctl_space_is_not() {
414 assert_eq!(true, is_ctl('\t'));
415 assert_eq!(false, is_ctl(' '));
416 }
417
418 #[test]
419 fn is_toke_empty() {
420 assert_eq!(false, is_token(""));
421 }
422}
423