daaki-imap 0.2.0

An IMAP4rev1/IMAP4rev2 async client library
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
//! Core IMAP grammar primitives (RFC 3501 Section 9 / RFC 9051 Section 9).

#[allow(clippy::wildcard_imports)]
use super::*;

/// CRLF terminator (RFC 3501 Section 9 / RFC 9051 Section 9).
pub(super) fn crlf(input: &[u8]) -> IResult<&[u8], &[u8]> {
    tag(b"\r\n")(input)
}

/// SP (single space) (RFC 3501 Section 9 / RFC 9051 Section 9).
pub(super) fn sp(input: &[u8]) -> IResult<&[u8], u8> {
    nom::character::complete::char(' ')(input).map(|(i, _)| (i, b' '))
}

/// Skip a complete parenthesized block `(...)` with balanced nesting.
///
/// Used to skip LIST-EXTENDED data (RFC 5258) like OLDNAME or CHILDINFO.
pub(super) fn skip_parenthesized_block(input: &[u8]) -> IResult<&[u8], ()> {
    let (input, _) = char('(')(input)?;
    let (input, ()) = skip_balanced_parens(input)?;
    let (input, _) = char(')')(input)?;
    Ok((input, ()))
}

/// Skip bytes until the next unmatched `)`, handling nested parentheses.
///
/// Used to skip extension data that may contain nested parenthesized values,
/// e.g. in NAMESPACE descriptors (RFC 2342) or LIST-EXTENDED data (RFC 5258).
/// Quoted strings and literal strings within the skipped region are handled
/// so that parentheses inside them do not affect depth tracking.
///
/// Handles:
/// - Nested `(...)` groups with balanced depth tracking
/// - Quoted strings `"..."` with `\` escape handling
/// - Literal strings `{n}\r\n<n bytes>` (RFC 3501 Section 9)
/// - Literal+ `{n+}\r\n<n bytes>` (RFC 7888)
/// - Literal8 `~{n}\r\n<n bytes>` (RFC 6855 Section 4)
pub(super) fn skip_balanced_parens(mut input: &[u8]) -> IResult<&[u8], ()> {
    let mut depth: u32 = 0;
    loop {
        if input.is_empty() {
            // If we've consumed all input while parentheses are still open,
            // signal incomplete — the caller will either wait for more data
            // or surface a parse error. Returning Ok here would silently
            // consume bytes from subsequent responses (RFC 3501 Section 7.4.2).
            return if depth == 0 {
                Ok((input, ()))
            } else {
                Err(nom::Err::Error(nom::error::Error::new(
                    input,
                    nom::error::ErrorKind::Char,
                )))
            };
        }
        if depth == 0 && input[0] == b')' {
            return Ok((input, ()));
        }
        match input[0] {
            b'(' => {
                depth += 1;
                input = &input[1..];
            }
            b')' if depth > 0 => {
                depth -= 1;
                input = &input[1..];
            }
            b'"' => {
                // Skip quoted string contents (may contain parens).
                input = &input[1..];
                while !input.is_empty() && input[0] != b'"' {
                    if input[0] == b'\\' && input.len() > 1 {
                        // Guard against escapes consuming CR/LF:
                        // RFC 3501 Section 9: QUOTED-CHAR excludes CR and LF.
                        // A backslash before CR/LF is malformed — break out
                        // so the outer loop does not skip past the response-
                        // terminating CRLF.
                        if input[1] == b'\r' || input[1] == b'\n' {
                            break;
                        }
                        input = &input[2..]; // skip escaped char
                    } else {
                        input = &input[1..];
                    }
                }
                if !input.is_empty() {
                    input = &input[1..]; // skip closing quote
                }
            }
            // Handle literal8 prefix: ~{n}\r\n<n bytes> (RFC 6855 Section 4)
            b'~' if input.len() > 1 && input[1] == b'{' => {
                input = &input[1..]; // skip '~', fall through to '{' on next iteration
            }
            b'{' => {
                // Handle literal: {n}\r\n<n bytes> (RFC 3501 Section 9)
                // and literal+: {n+}\r\n<n bytes> (RFC 7888)
                input = &input[1..]; // skip '{'
                                     // Extract the count digits
                let start = 0;
                let mut end = start;
                while end < input.len() && input[end].is_ascii_digit() {
                    end += 1;
                }
                if end > start && end < input.len() {
                    let count_end = end;
                    // Skip optional '+' for LITERAL+ (RFC 7888)
                    if input[end] == b'+' {
                        end += 1;
                    }
                    if end < input.len() && input[end] == b'}' {
                        end += 1; // skip '}'
                                  // Skip CRLF after '}'
                        if end + 1 < input.len() && input[end] == b'\r' && input[end + 1] == b'\n' {
                            end += 2;
                            // Parse the byte count and skip that many bytes.
                            // Use checked_add to prevent wrapping on crafted counts
                            // near usize::MAX (RFC 3501 Section 9 / RFC 9051 Section 9).
                            if let Ok(s) = std::str::from_utf8(&input[start..count_end]) {
                                if let Ok(count) = s.parse::<usize>() {
                                    match end.checked_add(count) {
                                        Some(new_end) if new_end <= input.len() => {
                                            input = &input[new_end..];
                                        }
                                        _ => {
                                            // Literal body exceeds available data or
                                            // overflows usize — stop scanning to avoid
                                            // misinterpreting literal body bytes as
                                            // parenthesized structure
                                            // (RFC 3501 Section 9 / RFC 9051 Section 9).
                                            return Ok((input, ()));
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
                // If literal parsing failed, just continue (already past '{')
            }
            _ => {
                input = &input[1..];
            }
        }
    }
}

/// Parse an atom: 1*ATOM-CHAR (RFC 3501 Section 9 / RFC 9051 Section 9).
pub(super) fn atom(input: &[u8]) -> IResult<&[u8], &[u8]> {
    take_while1(is_atom_char)(input)
}

/// Parse an objectid value per RFC 8474 Section 7.
///
/// `objectid = 1*255(ALPHA / DIGIT / "_" / "-")`
///
/// Delegates to the generic [`atom`] parser so that non-compliant servers
/// (characters outside the restricted set or values exceeding the 255-char
/// limit) remain interoperable via Postel's law (RFC 1122 Section 1.2.2).
/// Truncating oversized values would poison the input stream and cause
/// downstream parse failures.
pub(super) fn objectid(input: &[u8]) -> IResult<&[u8], &[u8]> {
    atom(input)
}

/// Parse a FETCH attribute name — like [`atom`] but stops at `[` so that
/// `BODY[section]` is split into atom `BODY` and section `[section]`.
pub(super) fn fetch_attr_atom(input: &[u8]) -> IResult<&[u8], &[u8]> {
    take_while1(is_atom_char_no_bracket)(input)
}

/// Parse a tag: `1*<any ASTRING-CHAR except "+">` (RFC 3501 Section 9 / RFC 9051 Section 9).
///
/// ASTRING-CHAR = ATOM-CHAR / resp-specials, so tags allow `]` but exclude `+`.
pub(super) fn tag_str(input: &[u8]) -> IResult<&[u8], &[u8]> {
    take_while1(is_tag_char)(input)
}

/// Check if byte is a valid tag character per RFC 3501 Section 9.
///
/// `tag = 1*<any ASTRING-CHAR except "+">` where `ASTRING-CHAR = ATOM-CHAR / resp-specials`.
/// `resp-specials = "]"`, so tags allow `]` in addition to ATOM-CHAR.
fn is_tag_char(b: u8) -> bool {
    (is_atom_char(b) || b == b']') && b != b'+'
}

/// Check if byte is a valid ATOM-CHAR per RFC 3501 Section 9 / RFC 9051 Section 9.
///
/// `atom-specials = "(" / ")" / "{" / SP / CTL / list-wildcards / quoted-specials / resp-specials`
/// `list-wildcards = "%" / "*"`
/// `quoted-specials = DQUOTE / "\"`
/// `resp-specials = "]"`
///
/// Note: `[` is NOT an atom-special per the RFC grammar. It is valid in atoms.
///
/// RFC 3501 Section 9: ATOM-CHAR = <any CHAR except atom-specials> where CHAR = %x01-7f.
/// We intentionally accept bytes 0x80-0xFF for compatibility with servers that send
/// non-ASCII bytes in atoms (common with non-conformant servers). This follows Postel's law.
pub(super) fn is_atom_char(b: u8) -> bool {
    b > 0x1F
        && b != 0x7F
        && b != b' '
        && b != b'('
        && b != b')'
        && b != b'{'
        && b != b'%'
        && b != b'*'
        && b != b'"'
        && b != b'\\'
        && b != b']'
}

/// Like [`is_atom_char`] but also excludes `[`.
///
/// Used in contexts where `[` acts as a delimiter — specifically FETCH response
/// attribute names (e.g., `BODY[section]`, `BINARY[section]`) — so the atom must
/// stop before `[`.
pub(super) fn is_atom_char_no_bracket(b: u8) -> bool {
    is_atom_char(b) && b != b'['
}

/// Parse a quoted string (RFC 3501 Section 9 / RFC 9051 Section 9).
///
/// Handles non-ASCII bytes 0x01-0xFF per real-world server behavior.
/// Returns the unescaped content.
pub(super) fn quoted_string(input: &[u8]) -> IResult<&[u8], Vec<u8>> {
    let (input, _) = char('"')(input)?;
    let mut result = Vec::new();
    let mut i = input;
    loop {
        if i.is_empty() {
            // Complete-mode: all input available — unterminated quote is an error,
            // not "need more data" (RFC 3501 Section 9).
            return Err(nom::Err::Error(nom::error::Error::new(
                i,
                nom::error::ErrorKind::Char,
            )));
        }
        match i[0] {
            b'"' => {
                return Ok((&i[1..], result));
            }
            b'\\' => {
                // Escaped character: \" or \\ per RFC 3501 Section 9.
                if i.len() < 2 {
                    // Complete-mode: truncated escape is an error (RFC 3501 Section 9).
                    return Err(nom::Err::Error(nom::error::Error::new(
                        i,
                        nom::error::ErrorKind::Char,
                    )));
                }
                let escaped = i[1];
                // RFC 3501 Section 9: NUL (%x00) MUST NOT be used at any time.
                // QUOTED-CHAR also excludes CR and LF.
                // A backslash before NUL/CR/LF is malformed — reject it so
                // the escape cannot smuggle a NUL or skip past the
                // response-terminating CRLF.
                if escaped == 0 || escaped == b'\r' || escaped == b'\n' {
                    return Err(nom::Err::Error(nom::error::Error::new(
                        i,
                        nom::error::ErrorKind::Char,
                    )));
                }
                if escaped != b'"' && escaped != b'\\' {
                    // RFC 3501 Section 9: only \" and \\ are valid quoted-specials.
                    // QUOTED-CHAR = <any TEXT-CHAR except quoted-specials> /
                    //               "\" quoted-specials
                    // A backslash followed by a non-quoted-special is malformed.
                    // Preserve the backslash as literal data per Postel's law —
                    // the sender clearly intended to include it.
                    tracing::debug!(
                        escaped_byte = escaped,
                        "non-standard quoted-string escape: preserving backslash as literal data"
                    );
                    result.push(b'\\');
                }
                result.push(escaped);
                i = &i[2..];
            }
            0 => {
                // NUL not allowed in quoted strings
                return Err(nom::Err::Error(nom::error::Error::new(
                    i,
                    nom::error::ErrorKind::Char,
                )));
            }
            b => {
                // RFC 3501: QUOTED-CHAR = any TEXT-CHAR except quoted-specials
                // TEXT-CHAR = any CHAR except CR and LF
                // Real servers send 0x80-0xFF (non-ASCII) — accept them.
                if b == b'\r' || b == b'\n' {
                    return Err(nom::Err::Error(nom::error::Error::new(
                        i,
                        nom::error::ErrorKind::Char,
                    )));
                }
                result.push(b);
                i = &i[1..];
            }
        }
    }
}

/// Parse a literal: `{count}\r\n<bytes>` (RFC 3501 Section 9 / RFC 9051 Section 9).
///
/// Also handles LITERAL+ non-synchronizing literals `{count+}` (RFC 7888 Section 4)
/// and literal8 `~{count}\r\n<bytes>` (RFC 6855 Section 4).
pub(super) fn literal(input: &[u8]) -> IResult<&[u8], Vec<u8>> {
    // Optional '~' prefix for literal8 (RFC 6855 Section 4)
    let (input, _is_literal8) = opt(char('~'))(input)?;
    let (input, _) = char('{')(input)?;
    let (input, count_bytes) = digit1(input)?;
    let count_str = std::str::from_utf8(count_bytes).map_err(|_| {
        nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::Digit))
    })?;
    // Parse as u64 first to avoid truncation on 32-bit platforms where usize
    // is 32 bits (RFC 9051 Section 9: literal uses number64, 0..2^63-1).
    let count_u64: u64 = count_str.parse().map_err(|_| {
        nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::Digit))
    })?;
    if count_u64 > i64::MAX as u64 {
        return Err(nom::Err::Error(nom::error::Error::new(
            input,
            nom::error::ErrorKind::Verify,
        )));
    }
    // Convert to usize for take(). On 32-bit platforms, counts > 4GB are
    // rejected here rather than silently truncating.
    let count: usize = usize::try_from(count_u64).map_err(|_| {
        nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::Verify))
    })?;
    // RFC 9051 Section 9: literal8 = "~{" number64 "}" — no `["+"]`.
    // RFC 7888 Section 4: LITERAL+ `{n+}` is for regular literals only.
    // Per Postel's law, we tolerate `~{n+}` from non-conformant servers
    // since the `+` has no semantic impact on server-to-client data.
    let (input, _has_plus) = opt(char('+'))(input)?;
    let (input, _) = char('}')(input)?;
    let (input, _) = crlf(input)?;
    let (input, data) = take(count)(input)?;
    Ok((input, data.to_vec()))
}

/// Parse a `string`: quoted string or literal (RFC 3501 Section 9).
pub(super) fn string(input: &[u8]) -> IResult<&[u8], Vec<u8>> {
    alt((quoted_string, literal))(input)
}

/// Parse an `astring`: atom or string (RFC 3501 Section 9).
///
/// Used for mailbox names and other contexts where atoms are accepted.
pub(super) fn astring(input: &[u8]) -> IResult<&[u8], Vec<u8>> {
    alt((
        // Also accept resp-specials ']' and atom-specials in astring-char context
        map(astring_chars, |s: &[u8]| s.to_vec()),
        string,
    ))(input)
}

/// 1*ASTRING-CHAR: atom chars plus ']' (RFC 3501 Section 9).
fn astring_chars(input: &[u8]) -> IResult<&[u8], &[u8]> {
    take_while1(|b: u8| is_atom_char(b) || b == b']')(input)
}

/// Match the `NIL` atom with token-boundary verification (RFC 3501 Section 9).
///
/// Plain `tag_no_case(b"NIL")` would greedily match the prefix of atoms like
/// "NILSIMSA", corrupting the parse. This combinator ensures the three-byte
/// match is followed by an atom-special or end-of-input, so only the
/// standalone `NIL` token is accepted.
///
/// RFC 3501 Section 9 ABNF:
///   atom-specials = "(" / ")" / "{" / SP / CTL / list-wildcards /
///                   quoted-specials / resp-specials
/// where list-wildcards = "%" / "*", quoted-specials = DQUOTE / "\",
/// resp-specials = "]".
pub(super) fn nil_token(input: &[u8]) -> IResult<&[u8], &[u8]> {
    terminated(
        tag_no_case(b"NIL"),
        peek(alt((
            // Any atom-special byte (RFC 3501 Section 9):
            //   atom-specials = "(" / ")" / "{" / SP / CTL /
            //                   list-wildcards / quoted-specials / resp-specials
            // This is exactly the set of bytes that are NOT atom-chars,
            // i.e., `!is_atom_char(b)`.
            value((), verify(take(1u8), |b: &[u8]| !is_atom_char(b[0]))),
            // End-of-input
            value((), eof),
        ))),
    )(input)
}

/// Parse an `nstring`: NIL or string (RFC 3501 Section 9 / RFC 9051 Section 9).
pub(super) fn nstring(input: &[u8]) -> IResult<&[u8], Option<Vec<u8>>> {
    alt((value(None, nil_token), map(string, Some)))(input)
}

/// Parse a number: 1*DIGIT (RFC 3501 Section 9 / RFC 9051 Section 9).
///
/// Returns u32. Errors gracefully on overflow.
pub(super) fn number(input: &[u8]) -> IResult<&[u8], u32> {
    let (input, digits) = digit1(input)?;
    let s = std::str::from_utf8(digits).map_err(|_| {
        nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::Digit))
    })?;
    let n: u32 = s.parse().map_err(|_| {
        nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::Digit))
    })?;
    Ok((input, n))
}

/// Parse a non-zero number: `nz-number = digit-nz *DIGIT` (RFC 3501 Section 9 / RFC 9051 Section 9).
///
/// Used for message sequence numbers, UIDs, and ESEARCH MIN/MAX values
/// where zero is never valid.
pub(super) fn nz_number(input: &[u8]) -> IResult<&[u8], u32> {
    // RFC 3501 Section 9: nz-number = digit-nz *DIGIT
    // digit-nz = %x31-39 ; 1-9 — leading '0' is never valid.
    if input.first() == Some(&b'0') {
        return Err(nom::Err::Error(nom::error::Error::new(
            input,
            nom::error::ErrorKind::Verify,
        )));
    }
    let (rest, n) = number(input)?;
    if n == 0 {
        return Err(nom::Err::Error(nom::error::Error::new(
            input,
            nom::error::ErrorKind::Verify,
        )));
    }
    Ok((rest, n))
}

/// Parse a 64-bit number for MODSEQ values (RFC 7162 Section 3.1.3).
///
/// RFC 9051 Section 4: `mod-sequence-value = 1*DIGIT` constrained to non-negative
/// 63-bit values (0 .. 2^63-1). Values above `i64::MAX` are rejected.
pub(super) fn number64(input: &[u8]) -> IResult<&[u8], u64> {
    let (input, digits) = digit1(input)?;
    let s = std::str::from_utf8(digits).map_err(|_| {
        nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::Digit))
    })?;
    let n: u64 = s.parse().map_err(|_| {
        nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::Digit))
    })?;
    // RFC 9051 Section 4: mod-sequence-value is limited to 63 bits (non-negative signed).
    if n > i64::MAX as u64 {
        return Err(nom::Err::Error(nom::error::Error::new(
            input,
            nom::error::ErrorKind::Verify,
        )));
    }
    Ok((input, n))
}

/// Parse a non-zero 64-bit number: `nz-number` variant for mod-sequence-value
/// (RFC 7162 Section 3.1.3).
///
/// Used where MODSEQ/HIGHESTMODSEQ values must be >= 1 (e.g. FETCH MODSEQ,
/// ESEARCH MODSEQ, response-code HIGHESTMODSEQ). STATUS HIGHESTMODSEQ
/// correctly allows 0 and should continue using `number64`.
pub(super) fn nz_number64(input: &[u8]) -> IResult<&[u8], u64> {
    // RFC 9051 Section 9: nz-number64 = digit-nz *DIGIT
    // digit-nz = %x31-39 ; 1-9 — leading '0' is never valid.
    if input.first() == Some(&b'0') {
        return Err(nom::Err::Error(nom::error::Error::new(
            input,
            nom::error::ErrorKind::Verify,
        )));
    }
    let (rest, n) = number64(input)?;
    if n == 0 {
        return Err(nom::Err::Error(nom::error::Error::new(
            input,
            nom::error::ErrorKind::Verify,
        )));
    }
    Ok((rest, n))
}

/// Tolerant version of [`number`] for contexts where an overflowing value
/// should be skipped rather than rejected. Consumes the digit run and
/// returns `None` if the value exceeds `u32::MAX`
/// (Postel's law — RFC 1122 Section 1.2.2).
pub(super) fn number_tolerant(input: &[u8]) -> IResult<&[u8], Option<u32>> {
    let (rest, digits) = digit1(input)?;
    let val = std::str::from_utf8(digits)
        .ok()
        .and_then(|s| s.parse::<u32>().ok());
    Ok((rest, val))
}

/// Tolerant version of [`number64`] for contexts where an overflowing value
/// should be skipped rather than rejected. Consumes the digit run and
/// returns `None` if the value exceeds `i64::MAX`
/// (Postel's law — RFC 1122 Section 1.2.2, RFC 9051 Section 4).
pub(super) fn number64_tolerant(input: &[u8]) -> IResult<&[u8], Option<u64>> {
    let (rest, digits) = digit1(input)?;
    let val = std::str::from_utf8(digits)
        .ok()
        .and_then(|s| s.parse::<u64>().ok())
        .filter(|&n| i64::try_from(n).is_ok());
    Ok((rest, val))
}

/// Parse nstring and convert to Option<String> (lossy UTF-8) (RFC 3501 Section 9 / RFC 9051 Section 9).
pub(super) fn nstring_utf8(input: &[u8]) -> IResult<&[u8], Option<String>> {
    let (input, val) = nstring(input)?;
    Ok((input, val.map(|v| String::from_utf8_lossy(&v).into_owned())))
}

/// Parse a string and return as String (lossy UTF-8) (RFC 3501 Section 9 / RFC 9051 Section 9).
pub(super) fn string_utf8(input: &[u8]) -> IResult<&[u8], String> {
    let (input, val) = string(input)?;
    Ok((input, String::from_utf8_lossy(&val).into_owned()))
}

/// Parse an astring and return as String (lossy UTF-8) (RFC 3501 Section 9 / RFC 9051 Section 9).
pub(super) fn astring_utf8(input: &[u8]) -> IResult<&[u8], String> {
    let (input, val) = astring(input)?;
    Ok((input, String::from_utf8_lossy(&val).into_owned()))
}