eml_codec/text/
words.rs

1use crate::text::ascii;
2use crate::text::whitespace::cfws;
3use nom::{
4    bytes::complete::{tag, take_while1},
5    character::is_alphanumeric,
6    combinator::{opt, recognize},
7    multi::many0,
8    sequence::{delimited, pair},
9    IResult,
10};
11
12pub fn is_vchar(c: u8) -> bool {
13    (ascii::EXCLAMATION..=ascii::TILDE).contains(&c)
14}
15
16/// MIME Token allowed characters
17///
18/// forbidden: ()<>@,;:\"/[]?=
19fn is_mime_atom_text(c: u8) -> bool {
20    is_alphanumeric(c)
21        || c == ascii::EXCLAMATION
22        || c == ascii::NUM
23        || c == ascii::DOLLAR
24        || c == ascii::PERCENT
25        || c == ascii::AMPERSAND
26        || c == ascii::SQUOTE
27        || c == ascii::ASTERISK
28        || c == ascii::PLUS
29        || c == ascii::MINUS
30        || c == ascii::PERIOD
31        || c == ascii::CARRET
32        || c == ascii::UNDERSCORE
33        || c == ascii::GRAVE
34        || c == ascii::LEFT_CURLY
35        || c == ascii::PIPE
36        || c == ascii::RIGHT_CURLY
37        || c == ascii::TILDE
38}
39
40/// MIME Token
41///
42/// `[CFWS] 1*token_text [CFWS]`
43pub fn mime_atom(input: &[u8]) -> IResult<&[u8], &[u8]> {
44    delimited(opt(cfws), take_while1(is_mime_atom_text), opt(cfws))(input)
45}
46
47/// Atom allowed characters
48///
49/// authorized: !#$%&'*+-/=?^_`{|}~
50fn is_atext(c: u8) -> bool {
51    is_alphanumeric(c)
52        || c == ascii::EXCLAMATION
53        || c == ascii::NUM
54        || c == ascii::DOLLAR
55        || c == ascii::PERCENT
56        || c == ascii::AMPERSAND
57        || c == ascii::SQUOTE
58        || c == ascii::ASTERISK
59        || c == ascii::PLUS
60        || c == ascii::MINUS
61        || c == ascii::SLASH
62        || c == ascii::EQ
63        || c == ascii::QUESTION
64        || c == ascii::CARRET
65        || c == ascii::UNDERSCORE
66        || c == ascii::GRAVE
67        || c == ascii::LEFT_CURLY
68        || c == ascii::PIPE
69        || c == ascii::RIGHT_CURLY
70        || c == ascii::TILDE
71}
72
73/// Atom
74///
75/// `[CFWS] 1*atext [CFWS]`
76pub fn atom(input: &[u8]) -> IResult<&[u8], &[u8]> {
77    delimited(opt(cfws), take_while1(is_atext), opt(cfws))(input)
78}
79
80/// dot-atom-text
81///
82/// `1*atext *("." 1*atext)`
83pub fn dot_atom_text(input: &[u8]) -> IResult<&[u8], &[u8]> {
84    recognize(pair(
85        take_while1(is_atext),
86        many0(pair(tag("."), take_while1(is_atext))),
87    ))(input)
88}
89
90/// dot-atom
91///
92/// `[CFWS] dot-atom-text [CFWS]`
93#[allow(dead_code)]
94pub fn dot_atom(input: &[u8]) -> IResult<&[u8], &[u8]> {
95    delimited(opt(cfws), dot_atom_text, opt(cfws))(input)
96}
97
98#[cfg(test)]
99mod tests {
100    use super::*;
101
102    #[test]
103    fn test_atext() {
104        assert!(is_atext(b'='));
105        assert!(is_atext(b'5'));
106        assert!(is_atext(b'Q'));
107        assert!(!is_atext(b' '));
108        //assert!(is_atext('É')); // support utf8
109    }
110
111    #[test]
112    fn test_atom() {
113        assert_eq!(
114            atom(b"(skip)  imf_codec (hidden) aerogramme"),
115            Ok((&b"aerogramme"[..], &b"imf_codec"[..]))
116        );
117    }
118
119    #[test]
120    fn test_dot_atom_text() {
121        assert_eq!(
122            dot_atom_text(b"quentin.dufour.io abcdef"),
123            Ok((&b" abcdef"[..], &b"quentin.dufour.io"[..]))
124        );
125    }
126
127    #[test]
128    fn test_dot_atom() {
129        assert_eq!(
130            dot_atom(b"   (skip) quentin.dufour.io abcdef"),
131            Ok((&b"abcdef"[..], &b"quentin.dufour.io"[..]))
132        );
133    }
134}