bmail/parse/
mod.rs

1use nom::branch::alt;
2use nom::bytes::complete::tag;
3use nom::bytes::complete::take_while;
4use nom::bytes::complete::take_while1;
5
6use nom::combinator::map;
7use nom::combinator::opt;
8use nom::combinator::recognize;
9use nom::combinator::value;
10use nom::error::Error;
11use nom::error::ErrorKind;
12use nom::error::ParseError;
13use nom::multi::fold_many0;
14
15use nom::multi::many0;
16use nom::multi::many0_count;
17use nom::multi::many1;
18use nom::multi::many1_count;
19use nom::sequence::tuple;
20use nom::Err;
21use nom::IResult;
22
23use crate::{ByteStr, ByteString};
24
25pub mod address;
26pub mod date_time;
27pub mod email;
28pub mod header;
29
30pub(crate) fn is_wsp(ch: u8) -> bool {
31    ch == b' ' || ch == b'\t'
32}
33
34/// Recognize folding white space - semantically treated as a space
35pub fn fws(input: &[u8]) -> IResult<&[u8], ()> {
36    let (i, _o) = tuple((
37        opt(tuple((take_while(is_wsp), tag(b"\r\n")))),
38        take_while1(is_wsp),
39    ))(input)?;
40
41    Ok((i, ()))
42}
43
44fn satisfy_byte<F>(cond: F) -> impl Fn(&[u8]) -> IResult<&[u8], u8>
45where
46    F: Fn(u8) -> bool,
47{
48    move |input| {
49        if input.is_empty() {
50            Err(Err::Error(Error::from_error_kind(input, ErrorKind::Eof)))
51        } else {
52            let ch = input[0];
53            if cond(ch) {
54                Ok((&input[1..], input[0]))
55            } else {
56                Err(Err::Error(Error::from_error_kind(
57                    input,
58                    ErrorKind::Satisfy,
59                )))
60            }
61        }
62    }
63}
64
65fn is_vchar(ch: u8) -> bool {
66    0x21 <= ch && ch <= 0x7e
67}
68
69fn is_quotable(ch: u8) -> bool {
70    is_vchar(ch) || is_wsp(ch)
71}
72
73pub fn quoted_pair(input: &[u8]) -> IResult<&[u8], u8> {
74    let (i, (_backslash, ch)) = tuple((tag(b"\\"), satisfy_byte(is_quotable)))(input)?;
75    Ok((i, ch))
76}
77
78fn is_ctext(ch: u8) -> bool {
79    (33 <= ch && ch <= 39) || (42 <= ch && ch <= 91) || (93 <= ch && ch <= 126)
80}
81
82fn ccontent(input: &[u8]) -> IResult<&[u8], ()> {
83    alt((
84        value((), satisfy_byte(is_ctext)),
85        value((), quoted_pair),
86        comment,
87    ))(input)
88}
89
90fn comment(input: &[u8]) -> IResult<&[u8], ()> {
91    value(
92        (),
93        tuple((
94            tag(b"("),
95            many0_count(tuple((opt(fws), ccontent))),
96            opt(fws),
97            tag(b")"),
98        )),
99    )(input)
100}
101
102fn is_atext(ch: u8) -> bool {
103    ch.is_ascii_alphanumeric() || b"!#$%&'*+-/=?^_`{|}~".iter().any(|ch2| *ch2 == ch)
104}
105
106pub fn is_special(ch: u8) -> bool {
107    b"()<>[]:;@\\,.\"".iter().any(|ch2| *ch2 == ch)
108}
109
110pub fn atom(input: &[u8]) -> IResult<&[u8], &ByteStr> {
111    map(
112        tuple((opt(cfws), take_while1(is_atext), opt(cfws))),
113        |(_, the_atom, _)| ByteStr::from_slice(the_atom),
114    )(input)
115}
116
117fn dot_atom_text(input: &[u8]) -> IResult<&[u8], &ByteStr> {
118    // dot-atom-text   =   1*atext *("." 1*atext)
119    map(
120        recognize(tuple((
121            take_while1(is_atext),
122            many0_count(tuple((tag(b"."), take_while1(is_atext)))),
123        ))),
124        ByteStr::from_slice,
125    )(input)
126}
127
128pub fn dot_atom(input: &[u8]) -> IResult<&[u8], &ByteStr> {
129    map(
130        tuple((opt(cfws), dot_atom_text, opt(cfws))),
131        |(_, the_atom, _)| the_atom,
132    )(input)
133}
134
135pub fn cfws(input: &[u8]) -> IResult<&[u8], ()> {
136    alt((
137        value(
138            (),
139            tuple((many1_count(tuple((opt(fws), comment))), opt(fws))),
140        ),
141        fws,
142    ))(input)
143}
144
145fn is_qtext(ch: u8) -> bool {
146    ch == 33 || (35 <= ch && ch <= 91) || (93 <= ch && ch <= 126)
147}
148
149fn qcontent(input: &[u8]) -> IResult<&[u8], u8> {
150    alt((satisfy_byte(is_qtext), quoted_pair))(input)
151}
152
153// TODO - Cow here when possible, rather than always allocating?
154pub fn quoted_string(input: &[u8]) -> IResult<&[u8], ByteString> {
155    map(
156        tuple((
157            opt(cfws),
158            tag(b"\""),
159            many0(map(tuple((opt(fws), qcontent)), |(_, ch)| ch)),
160            opt(fws),
161            tag(b"\""),
162            opt(cfws),
163        )),
164        |(_, _, s, _, _, _)| ByteString(s),
165    )(input)
166}
167
168// TODO - Cow when possible?
169fn word(input: &[u8]) -> IResult<&[u8], ByteString> {
170    alt((map(atom, ToOwned::to_owned), quoted_string))(input)
171}
172
173// TODO - Cow when possible?
174pub fn phrase(input: &[u8]) -> IResult<&[u8], Vec<ByteString>> {
175    many1(word)(input)
176}
177
178#[test]
179pub fn test_multiword_phrase() {
180    use nom::combinator::complete;
181
182    let test = b"Brennan Vincent";
183
184    let x = complete(phrase)(test).unwrap();
185    eprintln!("{:?}", x);
186}
187
188// TODO - Cow when possible?
189pub fn unstructured(input: &[u8]) -> IResult<&[u8], ByteString> {
190    let (i, o) = fold_many0(
191        tuple((opt(fws), satisfy_byte(is_vchar))),
192        vec![],
193        |mut s, (maybe_fws, ch)| {
194            if let Some(()) = maybe_fws {
195                s.push(b' ');
196            }
197            s.push(ch);
198            s
199        },
200    )(input)?;
201    map(
202        fold_many0(satisfy_byte(is_wsp), o, |mut s, ch| {
203            s.push(ch);
204            s
205        }),
206        ByteString,
207    )(i)
208}
209
210#[cfg(test)]
211mod tests {
212    #[test]
213    fn test_fws() {
214        let (i, ()) = super::fws(b"    \r\n   hi!").unwrap();
215        assert_eq!(i, b"hi!");
216    }
217}