mail_internals/
grammar.rs

1//! This module contains a number of helper functions for writing parsers.
2//!
3//! Ironically they are also needed when writing mail encoders/generators
4//! e.g. for checking if a part need special encoding.
5use ::MailType;
6
7/// ftext as defined by RFC 5322
8///
9/// which is: printable US-ASCII characters not includign `:`
10///  => 0x21-0x39 / 0x3B-0x7E
11///  => '!'...'9' / ';'...'~'
12///  => <0x7F && != 0x3A
13#[inline(always)]
14pub fn is_ftext(ch: char) -> bool {
15    let bch = ch as u32;
16    bch > 32 && bch < 127 && ch != ':'
17}
18
19///WS as defined by RFC 5234
20#[inline(always)]
21pub fn is_ws(ch: char) -> bool {
22    // is not limited to ascii ws
23    //ch.is_whitespace()
24    //WSP            =  SP / HTAB
25    ch == ' ' || ch == '\t'
26}
27
28/// True if `ch` is `' '`
29#[inline(always)]
30pub fn is_space(ch: char) -> bool {
31    ch == ' '
32}
33
34/// True if `ch` is us-ascii (i.e. <128)
35#[inline(always)]
36pub fn is_ascii(ch: char) -> bool {
37    (ch as u32) < 128
38}
39
40/// True if `ch` is ascii and "visible"/"printable".
41///
42/// This is the case for any char in the (decimal)
43/// range 33..=126 which is '!'..='~'.
44#[inline(always)]
45pub fn is_ascii_vchar(ch: char) -> bool {
46    let u32_ch = ch as u32;
47    32 < u32_ch && u32_ch <= 126
48}
49
50/// VCHAR as defined by RFC 5243
51///
52/// Is true if it's either an us-ascii vchar or
53/// an non us-ascii char and the mail type is
54/// internationalized.
55///
56/// This mean that this includes _non printable_
57/// characters as long as the mail is internationalized
58/// and the character is non us-ascii utf-8.
59#[inline(always)]
60pub fn is_vchar(ch: char, mt: MailType) -> bool {
61    is_ascii_vchar(ch) || (mt == MailType::Internationalized && !is_ascii(ch))
62}
63
64
65//TODO as RFCs
66/// can be quoted in a quoted string (internalized) based on RFC ... and RFC ...
67#[inline(always)]
68pub fn is_quotable(ch: char, tp: MailType) -> bool {
69    is_vchar(ch, tp) || is_ws(ch)
70}
71
72/// any whitespace (char::is_whitespace)
73#[inline(always)]
74pub fn is_any_whitespace(ch: char) -> bool {
75    ch.is_whitespace()
76}
77
78/// ctext as defined by RFC 5322
79pub fn is_ctext(ch: char, mt: MailType) -> bool {
80    match ch {
81        '!'...'\'' |
82        '*'...'[' |
83        ']'...'~' => true,
84        // obs-ctext
85        _ => mt == MailType::Internationalized && !is_ascii( ch )
86    }
87}
88
89/// check if a char is a especial (_based on RFC 5322_)
90///
91/// Note that there is _another_ especial from a different RFC.
92pub fn is_special(ch: char) -> bool {
93    match ch {
94        '(' | ')' |
95        '<' | '>' |
96        '[' | ']' |
97        ':' | ';' |
98        '@' | '\\'|
99        ',' | '.' |
100        '"' => true,
101        _ => false
102    }
103}
104
105
106/// check if a char is an tspecial (based on RFC 2045)
107pub fn is_tspecial(ch: char) -> bool {
108    match ch {
109        '(' | ')' |
110        '<' | '>' |
111        '@' | ',' |
112        ';' | ':' |
113        '\\'| '"' |
114        '/' | '[' |
115        ']' | '?' |
116        '=' => true,
117        _ => false
118    }
119}
120
121
122
123/// atext as defined by RFC 5322
124#[inline(always)]
125pub fn is_atext(ch: char, tp: MailType) -> bool {
126    is_vchar(ch, tp) && !is_special(ch)
127}
128
129/// dtext as defined by RFC 5322
130#[inline(always)]
131pub fn is_dtext(ch: char , mt: MailType) -> bool {
132    match ch as u32 {
133        33...90 |
134        94...126 => true,
135        _ => mt == MailType::Internationalized && !is_ascii(ch)
136    }
137}
138
139/// qtext as defined by RFC 5322
140pub fn is_qtext(ch: char, mt: MailType) -> bool {
141    match ch {
142        //not ' ' [d:32]
143        '!' |
144        //not '"' [d:34]
145        '#'...'[' |
146        //not '\\' [d:92]
147        ']'...'~' => true,
148        _ => mt == MailType::Internationalized && !is_ascii(ch)
149    }
150}
151
152/// Chack if it is a CTL char (based on RFC 822).
153///
154/// # Note
155/// the standard specifies `'\t'` as a CTL but not `' '`
156/// but both `'\t'` and `' '` are LWSP-char i.e. semantically
157/// space i.e. _semantically equivalent_.
158#[inline(always)]
159pub fn is_ctl(ch: char) -> bool {
160    (ch as u32) < 32
161}
162
163/// Check if a char is an token char (based on RFC 2045).
164#[inline(always)]
165pub fn is_token_char(ch: char) -> bool {
166    is_ascii(ch) && !is_ctl(ch) && !is_tspecial(ch) && ch != ' '
167}
168
169
170//TODO add rfc
171/// Check if a char is especial (based on RFC ...).
172#[inline(always)]
173pub fn is_especial(ch: char) -> bool {
174    match ch {
175        '(' | ')' |
176        '<' | '>' |
177        '@' | ',' |
178        ';' | ':' |
179        '"' | '/'|
180        '[' | ']' |
181        '?' | '.' |
182        '=' => true,
183        _ => false
184    }
185}
186
187//TODO add rfc
188/// Check if a string is an token (based on RFC ...).
189pub fn is_token(s: &str) -> bool {
190    0 < s.len() && s.chars().all(is_token_char)
191}
192
193//
194//pub fn is_dot_atom_text( text: &str, mt: MailType ) -> bool {
195//    use nom::IResult;
196//    use self::parse::recognize_dot_atom_text;
197//
198//    let res = tuple!( text,
199//        call!( recognize_dot_atom_text, mt ),
200//        eof!()
201//    );
202//
203//    match res {
204//        IResult::Done(_, _) => true,
205//        _ => false
206//    }
207//}
208
209//pub mod parse {
210//    use nom::IResult;
211//    use super::{ is_atext, MailType };
212//
213//    pub fn recognize_dot_atom_text( input: &str, mt: MailType ) -> IResult<&str, &str> {
214//        recognize!( input, tuple!(
215//            take_while1!( call!( is_atext, mt ) ),
216//            many0!( tuple!(
217//                char!( "." ),
218//                take_while1!( call!( is_atext, mt ) )
219//            ) )
220//        ) )
221//    }
222//
223//}
224//TODO this should be some where else I think
225// (but it is used by `1. codec`, `2. components` )
226/// Grammar parts for encoded words (based on RFC 2047).
227pub mod encoded_word {
228    use nom;
229    use ::MailType;
230    use ::error::{EncodingError, EncodingErrorKind};
231    use super::{  is_especial, is_ascii_vchar };
232
233    /// maximal length of an encoded word
234    pub const MAX_ECW_LEN: usize = 75;
235
236    /// The syntax overhead from "framing" an encoded word.
237    ///
238    /// This is the start (1x`=?`) the first and second separator (2x`?`) and the
239    /// end (1x`?=`) leading to 6 byte overhead.
240    pub const ECW_SEP_OVERHEAD: usize = 6;
241
242    /// Represents the place at which the encoded word appears.
243    ///
244    /// Depending on the place more or less character have to be
245    /// encoded.
246    ///
247    /// Note: Implementations creating encoded words might use a
248    /// stricter context which is compatible with all places to
249    /// reduce code complexity.
250    #[derive(Debug, Copy, Clone, Hash, Eq, PartialEq)]
251    pub enum EncodedWordContext {
252        Phrase,
253        Text,
254        Comment
255    }
256
257    impl EncodedWordContext {
258
259        /// Returns a (context dependent) validator to check if a char can be represented without encoding.
260        fn char_validator( &self ) -> fn(char) -> bool {
261            use self::EncodedWordContext::*;
262            match *self {
263                Phrase => valid_char_in_ec_in_phrase,
264                Text => is_encoded_word_char,
265                Comment => valid_char_in_ec_in_comment,
266            }
267        }
268    }
269
270
271    /// Returns true if the given word is a encoded word.
272    ///
273    /// Note that this depends on the context the word appears in and the mail type.
274    /// The reason for this is that encoded words tend to be valid text even without
275    /// decoding them. But this means if the encoded word has some syntax error (e.g.
276    /// missing closing `?=`) it is no longer an encoded word but just some text which
277    /// happen to look similar to one.
278    pub fn is_encoded_word(word: &str, ctx: EncodedWordContext, mail_type: MailType) -> bool {
279        try_parse_encoded_word_parts(word, ctx, mail_type).is_ok()
280    }
281
282    /// Tries to parse the given string as an encoded word.
283    pub fn try_parse_encoded_word_parts(
284        word: &str,
285        ctx: EncodedWordContext,
286        mail_type: MailType
287    ) -> Result<(&str, &str, &str), EncodingError>
288    {
289        let char_validator = ctx.char_validator();
290        // Note we could get a possible speed up by making rustc generate
291        // a different function for each Context, inlining ALL char tests
292        let res = do_parse!(
293            word,
294            char!( '=' ) >>
295            char!( '?' ) >>
296            charset: take_while!( is_ew_token_char ) >>
297            char!( '?' ) >>
298            encoding: take_while!( is_ew_token_char ) >>
299            char!( '?' ) >>
300            text: take_while!( char_validator ) >>
301            char!( '?' ) >>
302            char!( '=' ) >>
303            eof!() >>
304            (charset, encoding, text)
305        );
306
307        match res {
308            nom::IResult::Done( rest, result ) => {
309                assert_eq!(rest.len(), 0, "[BUG] used nom::eof!() but rest.len() > 0");
310                Ok( result )
311            },
312            nom::IResult::Incomplete( .. ) => {
313                return Err((EncodingErrorKind::Malformed, mail_type).into());
314            }
315            nom::IResult::Error( .. ) => {
316                return Err((EncodingErrorKind::Malformed, mail_type).into());
317            }
318        }
319    }
320
321    /// True if the char can appear in an encoded word.
322    fn is_encoded_word_char(ch: char) -> bool {
323        is_ascii_vchar(ch) && ch != '?'
324    }
325
326    /// True if the char can appear in an encoded word appearing in a comment.
327    fn valid_char_in_ec_in_comment(ch: char) -> bool {
328        is_encoded_word_char(ch) && !(ch == '(' || ch == ')' || ch == '"')
329    }
330
331    /// True if the char is valid in an encode word appearing in a phrase.
332    fn valid_char_in_ec_in_phrase(ch: char) -> bool {
333        match ch {
334            '0'...'9' |
335            'a'...'z' |
336            'A'...'Z' |
337            '!' | '*' |
338            '+' | '-' |
339            '/' | '=' |
340            '_' => true,
341            _ => false
342        }
343    }
344
345    /// True if the char is a encoded word token.
346    ///
347    /// Encoded word tokens are used for the charset and
348    /// language part of an encoded word.
349    fn is_ew_token_char(ch: char) -> bool {
350        is_ascii_vchar(ch) && !is_especial(ch)
351    }
352
353}
354
355//TODO shouldn't we use `bind/quoted_string`?
356/// True if the given string is a quoted string.
357pub fn is_quoted_string(qstr: &str, tp: MailType) -> bool {
358    let mut iter = qstr.chars();
359    if let Some('"') = iter.next() {} else { return false }
360    let mut next = iter.next();
361    while let Some(ch) = next {
362        match ch {
363            '\\' => {
364                if let Some(next_char) = iter.next() {
365                    if !(is_vchar(next_char, tp) || is_ws(next_char)) {
366                        return false;
367                    }
368                } else {
369                    return false;
370                }
371            },
372            '"' => {
373                if iter.next().is_none() {
374                    return true;
375                } else {
376                    return false;
377                }
378            }
379            ch => {
380                if !is_qtext(ch, tp) {
381                    return false
382                }
383            }
384        }
385        next = iter.next()
386    }
387
388    // The only true return if we have a '"' followed by iter.next().is_none()
389    return false;
390}
391
392
393#[cfg(test)]
394mod test {
395    use super::*;
396
397    #[test]
398    fn _is_ascii_vchar() {
399        assert_eq!(false, is_ascii_vchar('\x7f'));
400        for bad_char in b'\0'..b' ' {
401            if is_ascii_vchar(bad_char as char) {
402                panic!("{:?} should not be a VCHAR", bad_char);
403            }
404        }
405        for good_char in b'!'..(b'~'+1) {
406            if !is_ascii_vchar(good_char as char) {
407                panic!("{:?} should be a VCHAR", good_char as char);
408            }
409        }
410    }
411
412    #[test]
413    fn htap_is_ctl_space_is_not() {
414        assert_eq!(true, is_ctl('\t'));
415        assert_eq!(false, is_ctl(' '));
416    }
417
418    #[test]
419    fn is_toke_empty() {
420        assert_eq!(false, is_token(""));
421    }
422}
423