eml_codec/text/
whitespace.rs

1use crate::text::ascii;
2use crate::text::encoding::encoded_word;
3use crate::text::quoted::quoted_pair;
4use nom::{
5    branch::alt,
6    bytes::complete::{is_not, tag, take_while1},
7    character::complete::{space0, space1},
8    combinator::{opt, recognize},
9    multi::{many0, many1},
10    sequence::{pair, tuple},
11    IResult,
12};
13
14/// Whitespace (space, new line, tab) content and
15/// delimited content (eg. comment, line, sections, etc.)
16
17/// Obsolete/Compatible CRLF
18///
19/// Theoretically, all lines must end with \r\n
20/// but some mail servers like Dovecot support malformated emails,
21/// for example with only \n eol. It works because
22/// \r or \n is allowed nowhere else, so we also add this support.
23
24pub fn obs_crlf(input: &[u8]) -> IResult<&[u8], &[u8]> {
25    alt((
26        tag(ascii::CRLF),
27        tag(ascii::CRCRLF),
28        tag(&[ascii::CR]),
29        tag(&[ascii::LF]),
30    ))(input)
31}
32
33/// ```abnf
34/// fold_line = any *(1*(crlf WS) any) crlf
35/// ```
36pub fn foldable_line(input: &[u8]) -> IResult<&[u8], &[u8]> {
37    recognize(tuple((
38        is_not(ascii::CRLF),
39        many0(pair(many1(pair(obs_crlf, space1)), is_not(ascii::CRLF))),
40        obs_crlf,
41    )))(input)
42}
43
44// --- whitespaces and comments
45
46// Note: WSP = SP / HTAB = %x20 / %x09
47// nom::*::space0 = *WSP
48// nom::*::space1 = 1*WSP
49
50/// Permissive foldable white space
51///
52/// Folding white space are used for long headers splitted on multiple lines.
53/// The obsolete syntax allowes multiple lines without content; implemented for compatibility
54/// reasons
55pub fn fws(input: &[u8]) -> IResult<&[u8], u8> {
56    let (input, _) = alt((recognize(many1(fold_marker)), space1))(input)?;
57    Ok((input, ascii::SP))
58}
59fn fold_marker(input: &[u8]) -> IResult<&[u8], &[u8]> {
60    let (input, _) = space0(input)?;
61    let (input, _) = obs_crlf(input)?;
62    space1(input)
63}
64
65/// Folding White Space with Comment
66///
67/// Note: we drop the comments for now...  
68///
69/// ```abnf
70///   ctext           =   %d33-39 /          ; Printable US-ASCII
71///                       %d42-91 /          ;  characters not including
72///                       %d93-126 /         ;  "(", ")", or "\"
73///                       obs-ctext
74///
75///   ccontent        =   ctext / quoted-pair / comment
76///
77///   comment         =   "(" *([FWS] ccontent) [FWS] ")"
78///
79///   CFWS            =   (1*([FWS] comment) [FWS]) / FWS
80/// ```
81pub fn cfws(input: &[u8]) -> IResult<&[u8], &[u8]> {
82    alt((recognize(comments), recognize(fws)))(input)
83}
84
85pub fn comments(input: &[u8]) -> IResult<&[u8], ()> {
86    let (input, _) = many1(tuple((opt(fws), comment)))(input)?;
87    let (input, _) = opt(fws)(input)?;
88    Ok((input, ()))
89}
90
91pub fn comment(input: &[u8]) -> IResult<&[u8], ()> {
92    let (input, _) = tag("(")(input)?;
93    let (input, _) = many0(tuple((opt(fws), ccontent)))(input)?;
94    let (input, _) = opt(fws)(input)?;
95    let (input, _) = tag(")")(input)?;
96    Ok((input, ()))
97}
98
99pub fn ccontent(input: &[u8]) -> IResult<&[u8], &[u8]> {
100    alt((
101        ctext,
102        recognize(quoted_pair),
103        recognize(encoded_word),
104        recognize(comment),
105    ))(input)
106}
107
108pub fn ctext(input: &[u8]) -> IResult<&[u8], &[u8]> {
109    take_while1(is_ctext)(input)
110}
111
112pub fn is_ctext(c: u8) -> bool {
113    is_restr_ctext(c) || is_obs_no_ws_ctl(c)
114}
115
116/// Check if it's a comment text character
117///
118/// ```abnf
119///   ctext           =   %d33-39 /          ; Printable US-ASCII
120///                       %d42-91 /          ;  characters not including
121///                       %d93-126 /         ;  "(", ")", or "\"
122///                       obs-ctext
123///```
124pub fn is_restr_ctext(c: u8) -> bool {
125    (ascii::EXCLAMATION..=ascii::SQUOTE).contains(&c)
126        || (ascii::ASTERISK..=ascii::LEFT_BRACKET).contains(&c)
127        || (ascii::RIGHT_BRACKET..=ascii::TILDE).contains(&c)
128}
129
130/// US ASCII control characters without effect
131///
132/// ```abnf
133///   obs-NO-WS-CTL   =   %d1-8 /            ; US-ASCII control
134///                       %d11 /             ;  characters that do not
135///                       %d12 /             ;  include the carriage
136///                       %d14-31 /          ;  return, line feed, and
137///                       %d127              ;  white space characters
138/// ```
139pub fn is_obs_no_ws_ctl(c: u8) -> bool {
140    (ascii::SOH..=ascii::BS).contains(&c)
141        || c == ascii::VT
142        || c == ascii::FF
143        || (ascii::SO..=ascii::US).contains(&c)
144        || c == ascii::DEL
145}
146
147#[cfg(test)]
148mod tests {
149    use super::*;
150
151    #[test]
152    fn test_obs_crlf() {
153        assert_eq!(obs_crlf(b"\rworld"), Ok((&b"world"[..], &b"\r"[..])));
154        assert_eq!(obs_crlf(b"\r\nworld"), Ok((&b"world"[..], &b"\r\n"[..])));
155        assert_eq!(obs_crlf(b"\nworld"), Ok((&b"world"[..], &b"\n"[..])));
156    }
157
158    #[test]
159    fn test_fws() {
160        assert_eq!(fws(b"\r\n world"), Ok((&b"world"[..], ascii::SP)));
161        assert_eq!(fws(b" \r\n \r\n world"), Ok((&b"world"[..], ascii::SP)));
162        assert_eq!(fws(b" world"), Ok((&b"world"[..], ascii::SP)));
163        assert!(fws(b"\r\nFrom: test").is_err());
164    }
165
166    #[test]
167    fn test_cfws() {
168        assert_eq!(
169            cfws(b"(A nice \\) chap) <pete(his account)@silly.test(his host)>"),
170            Ok((
171                &b"<pete(his account)@silly.test(his host)>"[..],
172                &b"(A nice \\) chap) "[..]
173            ))
174        );
175        assert_eq!(
176            cfws(b"(Chris's host.)public.example>,"),
177            Ok((&b"public.example>,"[..], &b"(Chris's host.)"[..]))
178        );
179        assert_eq!(
180            cfws(b"(double (comment) is fun) wouch"),
181            Ok((&b"wouch"[..], &b"(double (comment) is fun) "[..]))
182        );
183    }
184
185    #[test]
186    fn test_cfws_encoded_word() {
187        assert_eq!(
188            cfws(b"(=?US-ASCII?Q?Keith_Moore?=)"),
189            Ok((&b""[..], &b"(=?US-ASCII?Q?Keith_Moore?=)"[..])),
190        );
191    }
192}