imap_proto/parser/
core.rs

1use nom::{
2    branch::alt,
3    bytes::streaming::{escaped, tag, tag_no_case, take, take_while, take_while1},
4    character::streaming::{char, digit1, one_of},
5    combinator::{map, map_res},
6    multi::{separated_list0, separated_list1},
7    sequence::{delimited, tuple},
8    IResult,
9};
10
11use std::str::{from_utf8, FromStr};
12
13// ----- number -----
14
15// number          = 1*DIGIT
16//                    ; Unsigned 32-bit integer
17//                    ; (0 <= n < 4,294,967,296)
18pub fn number(i: &[u8]) -> IResult<&[u8], u32> {
19    let (i, bytes) = digit1(i)?;
20    match from_utf8(bytes).ok().and_then(|s| u32::from_str(s).ok()) {
21        Some(v) => Ok((i, v)),
22        None => Err(nom::Err::Error(nom::error::make_error(
23            i,
24            nom::error::ErrorKind::MapRes,
25        ))),
26    }
27}
28
29// same as `number` but 64-bit
30pub fn number_64(i: &[u8]) -> IResult<&[u8], u64> {
31    let (i, bytes) = digit1(i)?;
32    match from_utf8(bytes).ok().and_then(|s| u64::from_str(s).ok()) {
33        Some(v) => Ok((i, v)),
34        None => Err(nom::Err::Error(nom::error::make_error(
35            i,
36            nom::error::ErrorKind::MapRes,
37        ))),
38    }
39}
40
41// seq-range       = seq-number ":" seq-number
42//                    ; two seq-number values and all values between
43//                    ; these two regardless of order.
44//                    ; seq-number is a nz-number
45pub fn sequence_range(i: &[u8]) -> IResult<&[u8], std::ops::RangeInclusive<u32>> {
46    map(tuple((number, tag(":"), number)), |(s, _, e)| s..=e)(i)
47}
48
49// sequence-set    = (seq-number / seq-range) *("," sequence-set)
50//                     ; set of seq-number values, regardless of order.
51//                     ; Servers MAY coalesce overlaps and/or execute the
52//                     ; sequence in any order.
53pub fn sequence_set(i: &[u8]) -> IResult<&[u8], Vec<std::ops::RangeInclusive<u32>>> {
54    separated_list1(tag(","), alt((sequence_range, map(number, |n| n..=n))))(i)
55}
56
57// ----- string -----
58
59// string = quoted / literal
60pub fn string(i: &[u8]) -> IResult<&[u8], &[u8]> {
61    alt((quoted, literal))(i)
62}
63
64// string bytes as utf8
65pub fn string_utf8(i: &[u8]) -> IResult<&[u8], &str> {
66    map_res(string, from_utf8)(i)
67}
68
69// quoted = DQUOTE *QUOTED-CHAR DQUOTE
70pub fn quoted(i: &[u8]) -> IResult<&[u8], &[u8]> {
71    delimited(
72        char('"'),
73        escaped(
74            take_while1(|byte| is_text_char(byte) && !is_quoted_specials(byte)),
75            '\\',
76            one_of("\\\""),
77        ),
78        char('"'),
79    )(i)
80}
81
82// quoted bytes as utf8
83pub fn quoted_utf8(i: &[u8]) -> IResult<&[u8], &str> {
84    map_res(quoted, from_utf8)(i)
85}
86
87// quoted-specials = DQUOTE / "\"
88pub fn is_quoted_specials(c: u8) -> bool {
89    c == b'"' || c == b'\\'
90}
91
92/// literal = "{" number "}" CRLF *CHAR8
93///            ; Number represents the number of CHAR8s
94pub fn literal(input: &[u8]) -> IResult<&[u8], &[u8]> {
95    let mut parser = tuple((tag(b"{"), number, tag(b"}"), tag("\r\n")));
96
97    let (remaining, (_, count, _, _)) = parser(input)?;
98
99    let (remaining, data) = take(count)(remaining)?;
100
101    if !data.iter().all(|byte| is_char8(*byte)) {
102        // FIXME: what ErrorKind should this have?
103        return Err(nom::Err::Error(nom::error::Error::new(
104            remaining,
105            nom::error::ErrorKind::Verify,
106        )));
107    }
108
109    Ok((remaining, data))
110}
111
112/// CHAR8 = %x01-ff ; any OCTET except NUL, %x00
113pub fn is_char8(i: u8) -> bool {
114    i != 0
115}
116
117// ----- astring ----- atom (roughly) or string
118
119// astring = 1*ASTRING-CHAR / string
120pub fn astring(i: &[u8]) -> IResult<&[u8], &[u8]> {
121    alt((take_while1(is_astring_char), string))(i)
122}
123
124// astring bytes as utf8
125pub fn astring_utf8(i: &[u8]) -> IResult<&[u8], &str> {
126    map_res(astring, from_utf8)(i)
127}
128
129// ASTRING-CHAR = ATOM-CHAR / resp-specials
130pub fn is_astring_char(c: u8) -> bool {
131    is_atom_char(c) || is_resp_specials(c)
132}
133
134// ATOM-CHAR = <any CHAR except atom-specials>
135pub fn is_atom_char(c: u8) -> bool {
136    is_char(c) && !is_atom_specials(c)
137}
138
139// atom-specials = "(" / ")" / "{" / SP / CTL / list-wildcards / quoted-specials / resp-specials
140pub fn is_atom_specials(c: u8) -> bool {
141    c == b'('
142        || c == b')'
143        || c == b'{'
144        || c == b' '
145        || c < 32
146        || is_list_wildcards(c)
147        || is_quoted_specials(c)
148        || is_resp_specials(c)
149}
150
151// resp-specials = "]"
152pub fn is_resp_specials(c: u8) -> bool {
153    c == b']'
154}
155
156// atom = 1*ATOM-CHAR
157pub fn atom(i: &[u8]) -> IResult<&[u8], &str> {
158    map_res(take_while1(is_atom_char), from_utf8)(i)
159}
160
161// ----- nstring ----- nil or string
162
163// nstring = string / nil
164pub fn nstring(i: &[u8]) -> IResult<&[u8], Option<&[u8]>> {
165    alt((map(nil, |_| None), map(string, Some)))(i)
166}
167
168// nstring bytes as utf8
169pub fn nstring_utf8(i: &[u8]) -> IResult<&[u8], Option<&str>> {
170    alt((map(nil, |_| None), map(string_utf8, Some)))(i)
171}
172
173// nil = "NIL"
174pub fn nil(i: &[u8]) -> IResult<&[u8], &[u8]> {
175    tag_no_case("NIL")(i)
176}
177
178// ----- text -----
179
180// text = 1*TEXT-CHAR
181pub fn text(i: &[u8]) -> IResult<&[u8], &str> {
182    map_res(take_while(is_text_char), from_utf8)(i)
183}
184
185// TEXT-CHAR = <any CHAR except CR and LF>
186pub fn is_text_char(c: u8) -> bool {
187    is_char(c) && c != b'\r' && c != b'\n'
188}
189
190// CHAR = %x01-7F
191//          ; any 7-bit US-ASCII character,
192//          ;  excluding NUL
193// From RFC5234
194pub fn is_char(c: u8) -> bool {
195    matches!(c, 0x01..=0x7F)
196}
197
198// ----- others -----
199
200// list-wildcards = "%" / "*"
201pub fn is_list_wildcards(c: u8) -> bool {
202    c == b'%' || c == b'*'
203}
204
205pub fn paren_delimited<'a, F, O, E>(f: F) -> impl FnMut(&'a [u8]) -> IResult<&'a [u8], O, E>
206where
207    F: FnMut(&'a [u8]) -> IResult<&'a [u8], O, E>,
208    E: nom::error::ParseError<&'a [u8]>,
209{
210    delimited(char('('), f, char(')'))
211}
212
213pub fn parenthesized_nonempty_list<'a, F, O, E>(
214    f: F,
215) -> impl FnMut(&'a [u8]) -> IResult<&'a [u8], Vec<O>, E>
216where
217    F: FnMut(&'a [u8]) -> IResult<&'a [u8], O, E>,
218    E: nom::error::ParseError<&'a [u8]>,
219{
220    delimited(char('('), separated_list1(char(' '), f), char(')'))
221}
222
223pub fn parenthesized_list<'a, F, O, E>(f: F) -> impl FnMut(&'a [u8]) -> IResult<&'a [u8], Vec<O>, E>
224where
225    F: FnMut(&'a [u8]) -> IResult<&'a [u8], O, E>,
226    E: nom::error::ParseError<&'a [u8]>,
227{
228    delimited(char('('), separated_list0(char(' '), f), char(')'))
229}
230
231pub fn opt_opt<'a, F, O, E>(mut f: F) -> impl FnMut(&'a [u8]) -> IResult<&'a [u8], Option<O>, E>
232where
233    F: FnMut(&'a [u8]) -> IResult<&'a [u8], Option<O>, E>,
234{
235    move |i: &[u8]| match f(i) {
236        Ok((i, o)) => Ok((i, o)),
237        Err(nom::Err::Error(_)) => Ok((i, None)),
238        Err(e) => Err(e),
239    }
240}
241
242#[cfg(test)]
243mod tests {
244    use super::*;
245    use assert_matches::assert_matches;
246
247    #[test]
248    fn test_quoted() {
249        let (rem, val) = quoted(br#""Hello"???"#).unwrap();
250        assert_eq!(rem, b"???");
251        assert_eq!(val, b"Hello");
252
253        // Allowed escapes...
254        assert!(quoted(br#""Hello \" "???"#).is_ok());
255        assert!(quoted(br#""Hello \\ "???"#).is_ok());
256
257        // Not allowed escapes...
258        assert!(quoted(br#""Hello \a "???"#).is_err());
259        assert!(quoted(br#""Hello \z "???"#).is_err());
260        assert!(quoted(br#""Hello \? "???"#).is_err());
261
262        let (rem, val) = quoted(br#""Hello \"World\""???"#).unwrap();
263        assert_eq!(rem, br#"???"#);
264        // Should it be this (Hello \"World\") ...
265        assert_eq!(val, br#"Hello \"World\""#);
266        // ... or this (Hello "World")?
267        //assert_eq!(val, br#"Hello "World""#); // fails
268
269        // Test Incomplete
270        assert_matches!(quoted(br#""#), Err(nom::Err::Incomplete(_)));
271        assert_matches!(quoted(br#""\"#), Err(nom::Err::Incomplete(_)));
272        assert_matches!(quoted(br#""Hello "#), Err(nom::Err::Incomplete(_)));
273
274        // Test Error
275        assert_matches!(quoted(br"\"), Err(nom::Err::Error(_)));
276    }
277
278    #[test]
279    fn test_string_literal() {
280        match string(b"{3}\r\nXYZ") {
281            Ok((_, value)) => {
282                assert_eq!(value, b"XYZ");
283            }
284            rsp => panic!("unexpected response {rsp:?}"),
285        }
286    }
287
288    #[test]
289    fn test_astring() {
290        match astring(b"text ") {
291            Ok((_, value)) => {
292                assert_eq!(value, b"text");
293            }
294            rsp => panic!("unexpected response {rsp:?}"),
295        }
296    }
297
298    #[test]
299    fn test_sequence_range() {
300        match sequence_range(b"23:28 ") {
301            Ok((_, value)) => {
302                assert_eq!(*value.start(), 23);
303                assert_eq!(*value.end(), 28);
304                assert_eq!(value.collect::<Vec<u32>>(), vec![23, 24, 25, 26, 27, 28]);
305            }
306            rsp => panic!("Unexpected response {rsp:?}"),
307        }
308    }
309
310    #[test]
311    fn test_sequence_set() {
312        match sequence_set(b"1,2:8,10,15:30 ") {
313            Ok((_, value)) => {
314                assert_eq!(value.len(), 4);
315                let v = &value[0];
316                assert_eq!(*v.start(), 1);
317                assert_eq!(*v.end(), 1);
318                let v = &value[1];
319                assert_eq!(*v.start(), 2);
320                assert_eq!(*v.end(), 8);
321                let v = &value[2];
322                assert_eq!(*v.start(), 10);
323                assert_eq!(*v.end(), 10);
324                let v = &value[3];
325                assert_eq!(*v.start(), 15);
326                assert_eq!(*v.end(), 30);
327            }
328            rsp => panic!("Unexpected response {rsp:?}"),
329        }
330    }
331}