Skip to main content

daaki_imap/types/
validated.rs

1//! Validated newtypes for IMAP protocol strings.
2//!
3//! These types enforce syntactic validity at construction time so that
4//! downstream code (the encoder, the connection layer) can rely on
5//! well-formedness without re-validating.
6//!
7//! - [`SequenceSet`] — RFC 3501 Section 9 / RFC 9051 Section 9
8//! - [`ImapAtom`] — RFC 3501 Section 9 / RFC 9051 Section 9
9//! - [`MailboxName`] — RFC 3501 Section 5.1 / RFC 9051 Section 5.1
10//! - [`ObjectId`] — RFC 8474 Section 4
11
12use std::fmt;
13
14pub use daaki_message::ValidationError;
15
16// ---------------------------------------------------------------------------
17// SequenceSet — RFC 3501 Section 9
18// ---------------------------------------------------------------------------
19
20/// A validated IMAP sequence-set string.
21///
22/// RFC 3501 Section 9 formal syntax:
23/// ```text
24/// sequence-set    = (seq-number / seq-range) *("," sequence-set)
25/// seq-number      = nz-number / "*"
26/// seq-range       = seq-number ":" seq-number
27/// nz-number       = digit-nz *DIGIT      ; non-zero unsigned 32-bit integer
28/// ```
29///
30/// RFC 5182 Section 5 extends the grammar with `"$"` as a standalone
31/// element referencing saved search results.
32///
33/// Construction validates the full ABNF and rejects empty, malformed,
34/// or overflowing values.
35#[derive(Debug, Clone, PartialEq, Eq, Hash)]
36#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
37#[cfg_attr(feature = "serde", serde(try_from = "String", into = "String"))]
38pub struct SequenceSet(String);
39
40impl SequenceSet {
41    /// Consumes the wrapper and returns the inner string.
42    pub fn into_inner(self) -> String {
43        self.0
44    }
45
46    /// Create a general sequence set (allows `*` and `$`).
47    ///
48    /// Validates the input against RFC 3501 Section 9 `sequence-set` ABNF,
49    /// extended by RFC 5182 Section 5 for `$`.
50    pub fn new(s: impl Into<String>) -> Result<Self, ValidationError> {
51        let s = s.into();
52        if !is_valid_sequence_set(&s) {
53            return Err(ValidationError::new(format!(
54                "invalid sequence set per RFC 3501 Section 9: {s:?}"
55            )));
56        }
57        Ok(Self(s))
58    }
59
60    /// Create a "known" sequence set that rejects `*` and `$`.
61    ///
62    /// Used for QRESYNC `known-uids` and `seq-match-data` where the
63    /// wildcard `*` is explicitly disallowed (RFC 7162 Section 3.2.5.2)
64    /// and `$` (RFC 5182 search result reference) is not meaningful.
65    pub fn new_known(s: impl Into<String>) -> Result<Self, ValidationError> {
66        let s = s.into();
67        // RFC 7162 Section 3.2.5.2: "*" is not allowed in known-uids/sequence-set.
68        if s.contains('*') {
69            return Err(ValidationError::new(
70                "\"*\" is not allowed in QRESYNC known-uids/sequence-set \
71                 (RFC 7162 Section 3.2.5.2)",
72            ));
73        }
74        // RFC 5182 search result reference is not valid in QRESYNC context.
75        if s.contains('$') {
76            return Err(ValidationError::new(
77                "\"$\" (search result reference) is not allowed in QRESYNC \
78                 known-uids/sequence-set (RFC 7162 Section 7)",
79            ));
80        }
81        // Validate full sequence-set ABNF (since * and $ are already excluded,
82        // this validates nz-number ranges only).
83        if !is_valid_sequence_set(&s) {
84            return Err(ValidationError::new(
85                "QRESYNC known-uids/sequence-set is not a valid sequence-set \
86                 (RFC 7162 Section 7: known-uids = sequence-set)",
87            ));
88        }
89        Ok(Self(s))
90    }
91
92    /// Return the inner string.
93    pub fn as_str(&self) -> &str {
94        &self.0
95    }
96}
97
98impl AsRef<str> for SequenceSet {
99    fn as_ref(&self) -> &str {
100        &self.0
101    }
102}
103
104impl fmt::Display for SequenceSet {
105    /// Formats the sequence set as its wire representation
106    /// (RFC 3501 Section 9).
107    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
108        f.write_str(&self.0)
109    }
110}
111
112impl TryFrom<String> for SequenceSet {
113    type Error = ValidationError;
114
115    /// Convert from `String`, validating per RFC 3501 Section 9.
116    fn try_from(s: String) -> Result<Self, Self::Error> {
117        Self::new(s)
118    }
119}
120
121impl TryFrom<&str> for SequenceSet {
122    type Error = ValidationError;
123
124    /// Convert from `&str`, validating per RFC 3501 Section 9.
125    fn try_from(s: &str) -> Result<Self, Self::Error> {
126        Self::new(s)
127    }
128}
129
130impl From<SequenceSet> for String {
131    fn from(s: SequenceSet) -> Self {
132        s.into_inner()
133    }
134}
135
136/// Validates that `s` matches the RFC 3501 Section 9 `sequence-set` ABNF:
137///
138/// ```text
139/// sequence-set = (seq-number / seq-range) *("," sequence-set)
140/// seq-number   = nz-number / "*"
141/// seq-range    = seq-number ":" seq-number
142/// nz-number    = digit-nz *DIGIT
143/// ```
144///
145/// RFC 5182 Section 5 extends the grammar: `sequence-set =/ seq-last-command`,
146/// where `seq-last-command = "$"`.
147pub(crate) fn is_valid_sequence_set(s: &str) -> bool {
148    if s.is_empty() {
149        return false;
150    }
151    s.split(',').all(|part| {
152        if part.is_empty() {
153            return false;
154        }
155        // RFC 5182 Section 2: "$" references saved search results.
156        if part == "$" {
157            return true;
158        }
159        part.split(':').all(|num| {
160            if num == "*" {
161                return true;
162            }
163            // Must be non-empty, all digits, and not start with '0'.
164            if num.is_empty() || num.starts_with('0') || !num.chars().all(|c| c.is_ascii_digit()) {
165                return false;
166            }
167            // RFC 3501 Section 9: nz-number is a u32 (0 < n < 4,294,967,296).
168            num.parse::<u32>().is_ok()
169        }) && part.matches(':').count() <= 1
170    })
171}
172
173// ---------------------------------------------------------------------------
174// ImapAtom — RFC 3501 Section 9
175// ---------------------------------------------------------------------------
176
177/// A validated IMAP atom string.
178///
179/// RFC 3501 Section 9 / RFC 9051 Section 9:
180/// ```text
181/// atom            = 1*ATOM-CHAR
182/// ATOM-CHAR       = <any CHAR except atom-specials>
183/// atom-specials   = "(" / ")" / "{" / SP / CTL / list-wildcards
184///                 / quoted-specials / resp-specials
185/// list-wildcards  = "%" / "*"
186/// quoted-specials = DQUOTE / "\"
187/// resp-specials   = "]"
188/// ```
189///
190/// Used for flag keywords (`flag-keyword = atom`, RFC 3501 Section 9)
191/// and other protocol atoms.
192#[derive(Debug, Clone, PartialEq, Eq, Hash)]
193#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
194#[cfg_attr(feature = "serde", serde(try_from = "String", into = "String"))]
195pub struct ImapAtom(String);
196
197impl ImapAtom {
198    /// Consumes the wrapper and returns the inner string.
199    pub fn into_inner(self) -> String {
200        self.0
201    }
202
203    /// Create a validated atom.
204    ///
205    /// Returns an error if `s` is empty or contains any byte that is not
206    /// an ATOM-CHAR per RFC 3501 Section 9.
207    pub fn new(s: impl Into<String>) -> Result<Self, ValidationError> {
208        let s = s.into();
209        validate_atom_bytes(s.as_bytes(), "atom")?;
210        Ok(Self(s))
211    }
212
213    /// Return the inner string.
214    pub fn as_str(&self) -> &str {
215        &self.0
216    }
217}
218
219impl AsRef<str> for ImapAtom {
220    fn as_ref(&self) -> &str {
221        &self.0
222    }
223}
224
225impl fmt::Display for ImapAtom {
226    /// Formats the atom as its wire representation (RFC 3501 Section 9).
227    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
228        f.write_str(&self.0)
229    }
230}
231
232impl TryFrom<String> for ImapAtom {
233    type Error = ValidationError;
234
235    /// Convert from `String`, validating per RFC 3501 Section 9.
236    fn try_from(s: String) -> Result<Self, Self::Error> {
237        Self::new(s)
238    }
239}
240
241impl TryFrom<&str> for ImapAtom {
242    type Error = ValidationError;
243
244    /// Convert from `&str`, validating per RFC 3501 Section 9.
245    fn try_from(s: &str) -> Result<Self, Self::Error> {
246        Self::new(s)
247    }
248}
249
250impl From<ImapAtom> for String {
251    fn from(a: ImapAtom) -> Self {
252        a.into_inner()
253    }
254}
255
256/// Core atom validation logic shared by [`ImapAtom::new`] and the encoder's
257/// `validate_atom` helper.
258///
259/// RFC 3501 Section 9: `atom = 1*ATOM-CHAR`,
260/// `ATOM-CHAR = <any CHAR except atom-specials>`,
261/// `atom-specials = "(" / ")" / "{" / SP / CTL / list-wildcards / quoted-specials / resp-specials`,
262/// `list-wildcards = "%" / "*"`, `quoted-specials = DQUOTE / "\"`,
263/// `resp-specials = "]"`.
264pub(crate) fn validate_atom_bytes(bytes: &[u8], context: &str) -> Result<(), ValidationError> {
265    if bytes.is_empty() {
266        return Err(ValidationError::new(format!(
267            "{context} must be at least one character \
268             (RFC 3501 Section 9: atom = 1*ATOM-CHAR)"
269        )));
270    }
271    for &b in bytes {
272        let is_atom_special = matches!(
273            b,
274            b'(' | b')' | b'{' | b' ' | b'%' | b'*' | b'"' | b'\\' | b']'
275        );
276        let is_ctl = b < 0x20 || b == 0x7F;
277        let is_outside_char = b == 0 || b > 0x7F;
278        if is_atom_special || is_ctl || is_outside_char {
279            return Err(ValidationError::new(format!(
280                "{context} contains invalid byte 0x{b:02X} — must be an atom \
281                 (RFC 3501 Section 9: ATOM-CHAR excludes atom-specials, CTL, non-ASCII)"
282            )));
283        }
284    }
285    Ok(())
286}
287
288// ---------------------------------------------------------------------------
289// MailboxName — RFC 3501 Section 5.1
290// ---------------------------------------------------------------------------
291
292/// A mailbox name string with minimal validation.
293///
294/// RFC 3501 Section 5.1 / RFC 9051 Section 5.1: IMAP mailbox names can
295/// contain almost any character (transmitted as quoted strings or literals).
296/// The primary purpose of this type is documentation and injection-safe
297/// command construction. IMAP strings forbid NUL (RFC 3501 Section 9 /
298/// RFC 9051 Section 9), and mailbox names must not contain CRLF because
299/// commands are line-delimited (RFC 3501 Section 2.2 / RFC 9051 Section 2.2).
300///
301/// The empty string `""` is allowed as a special case for server-level
302/// METADATA queries (RFC 5464 Section 4.2) and LIST reference names
303/// (RFC 3501 Section 6.3.8).
304///
305/// Validation:
306/// - No NUL, CR, or LF bytes
307///
308/// # Parse-don't-validate discipline
309///
310/// `MailboxName` has exactly two construction paths: [`MailboxName::new`]
311/// (public, validating) and `from_decoded` (codec-private). There is no
312/// `From<String>` or `From<&str>` — smuggling unvalidated data through
313/// the type is a compile error:
314///
315/// ```compile_fail
316/// use daaki_imap::MailboxName;
317/// // There is no From<String>. This must fail to compile.
318/// let _: MailboxName = "Inbox".to_string().into();
319/// ```
320///
321/// ```compile_fail
322/// use daaki_imap::MailboxName;
323/// // There is no From<&str>. This must fail to compile.
324/// let _: MailboxName = "Inbox".into();
325/// ```
326#[derive(Debug, Clone, PartialEq, Eq, Hash)]
327pub struct MailboxName(String);
328
329impl MailboxName {
330    /// Create a validated mailbox name.
331    ///
332    /// Rejects strings containing NUL (forbidden in IMAP strings by
333    /// RFC 3501 Section 9 / RFC 9051 Section 9) or CR/LF (CRLF injection
334    /// prevention per RFC 3501 Section 2.2 / RFC 9051 Section 2.2).
335    /// The empty string is allowed for server-level METADATA queries
336    /// (RFC 5464 Section 4.2).
337    pub fn new(s: impl Into<String>) -> Result<Self, ValidationError> {
338        let s = s.into();
339        if s.bytes().any(|b| matches!(b, b'\0' | b'\r' | b'\n')) {
340            return Err(ValidationError::new(
341                "mailbox name must not contain NUL, CR, or LF — IMAP strings \
342                 forbid NUL (RFC 3501 Section 9 / RFC 9051 Section 9) and \
343                 commands are CRLF-delimited (RFC 3501 Section 2.2 / \
344                 RFC 9051 Section 2.2)",
345            ));
346        }
347        Ok(Self(s))
348    }
349
350    /// Return the inner string.
351    pub fn as_str(&self) -> &str {
352        &self.0
353    }
354
355    /// Construct from already-decoded bytes produced by the codec's
356    /// decoder. This constructor skips validation — callers must
357    /// guarantee the input has already been MUTF-7 decoded (or passed
358    /// through in UTF-8 mode per RFC 6855).
359    ///
360    /// This is the only non-validating constructor. Every other
361    /// constructor path validates via `new`.
362    ///
363    /// # Visibility
364    ///
365    /// Intended for use by `crate::codec` only. Rust's `pub(in path)`
366    /// requires the path to be an ancestor of the declaring module, so
367    /// `pub(in crate::codec)` cannot compile here in `crate::types`.
368    /// `pub(crate)` is the narrowest scope available.
369    pub(crate) fn from_decoded(s: String) -> Self {
370        Self(s)
371    }
372}
373
374impl Default for MailboxName {
375    /// Returns an empty mailbox name.
376    ///
377    /// The empty string is valid per RFC 5464 Section 4.2 (server-level
378    /// METADATA) and RFC 3501 Section 6.3.8 (LIST reference name).
379    fn default() -> Self {
380        Self(String::new())
381    }
382}
383
384impl AsRef<str> for MailboxName {
385    fn as_ref(&self) -> &str {
386        &self.0
387    }
388}
389
390impl fmt::Display for MailboxName {
391    /// Formats the mailbox name (RFC 3501 Section 5.1).
392    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
393        f.write_str(&self.0)
394    }
395}
396
397impl TryFrom<String> for MailboxName {
398    type Error = ValidationError;
399
400    /// Convert from `String`, validating per RFC 3501 Section 5.1.
401    fn try_from(s: String) -> Result<Self, Self::Error> {
402        Self::new(s)
403    }
404}
405
406// ---------------------------------------------------------------------------
407// ObjectId — RFC 8474 Section 4
408// ---------------------------------------------------------------------------
409
410/// An IMAP object identifier (MAILBOXID, EMAILID, THREADID).
411///
412/// RFC 8474 Section 4:
413/// ```text
414/// objectid = 1*255(ALPHA / DIGIT / "_" / "-")
415/// ```
416///
417/// Used for `MAILBOXID`, `EMAILID`, and `THREADID` values.
418#[derive(Debug, Clone, PartialEq, Eq, Hash)]
419#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
420#[cfg_attr(feature = "serde", serde(try_from = "String", into = "String"))]
421pub struct ObjectId(String);
422
423impl ObjectId {
424    /// Consumes the wrapper and returns the inner string.
425    pub fn into_inner(self) -> String {
426        self.0
427    }
428
429    /// Create a validated object identifier.
430    ///
431    /// RFC 8474 Section 4: 1–255 characters, each alphanumeric, dash, or
432    /// underscore.
433    pub fn new(s: impl Into<String>) -> Result<Self, ValidationError> {
434        let s = s.into();
435        if s.is_empty() {
436            return Err(ValidationError::new(
437                "object identifier must not be empty (RFC 8474 Section 4)",
438            ));
439        }
440        if s.len() > 255 {
441            return Err(ValidationError::new(format!(
442                "object identifier exceeds 255 characters ({} bytes) \
443                 (RFC 8474 Section 4: objectid = 1*255(...))",
444                s.len()
445            )));
446        }
447        for &b in s.as_bytes() {
448            let valid = b.is_ascii_alphanumeric() || b == b'-' || b == b'_';
449            if !valid {
450                return Err(ValidationError::new(format!(
451                    "object identifier contains invalid byte 0x{b:02X} — \
452                     only ALPHA / DIGIT / \"_\" / \"-\" are allowed \
453                     (RFC 8474 Section 4)"
454                )));
455            }
456        }
457        Ok(Self(s))
458    }
459
460    /// Return the inner string.
461    pub fn as_str(&self) -> &str {
462        &self.0
463    }
464}
465
466impl AsRef<str> for ObjectId {
467    fn as_ref(&self) -> &str {
468        &self.0
469    }
470}
471
472impl fmt::Display for ObjectId {
473    /// Formats the object identifier (RFC 8474 Section 4).
474    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
475        f.write_str(&self.0)
476    }
477}
478
479impl TryFrom<String> for ObjectId {
480    type Error = ValidationError;
481
482    /// Convert from `String`, validating per RFC 8474 Section 4.
483    fn try_from(s: String) -> Result<Self, Self::Error> {
484        Self::new(s)
485    }
486}
487
488impl TryFrom<&str> for ObjectId {
489    type Error = ValidationError;
490
491    /// Convert from `&str`, validating per RFC 8474 Section 4.
492    fn try_from(s: &str) -> Result<Self, Self::Error> {
493        Self::new(s)
494    }
495}
496
497impl From<ObjectId> for String {
498    fn from(o: ObjectId) -> Self {
499        o.into_inner()
500    }
501}
502
503// ---------------------------------------------------------------------------
504// ParsedUidSet — RFC 7162 Section 3.2.6 defensive filtering
505// ---------------------------------------------------------------------------
506
507use crate::types::UidRange;
508
509/// A parsed, normalized set of UID intervals for efficient containment checks.
510///
511/// Constructed from a [`SequenceSet`] by parsing its string representation into
512/// sorted, merged, non-overlapping inclusive `(start, end)` intervals.
513///
514/// Used by `FetchVanishedConsumer` to defensively filter `VANISHED (EARLIER)`
515/// UIDs that fall outside the requested set (RFC 7162 Section 3.2.6).
516///
517/// Returns `None` from [`ParsedUidSet::new`] when the set contains `$`
518/// (RFC 5182 search result reference), which cannot be resolved client-side.
519/// `*` is mapped to `u32::MAX` as a conservative upper bound — safe because `*`
520/// means "the highest UID in the mailbox" (RFC 3501 Section 6.4.8), and all
521/// reportable UIDs are <= that value.
522pub(crate) struct ParsedUidSet(Vec<(u32, u32)>);
523
524impl ParsedUidSet {
525    /// Parse a [`SequenceSet`] into sorted, merged, non-overlapping intervals.
526    ///
527    /// Returns `None` if the set contains `$` (RFC 5182 search result
528    /// reference), which cannot be resolved client-side — the caller should
529    /// skip filtering entirely in that case.
530    ///
531    /// `*` is mapped to `u32::MAX` per RFC 3501 Section 6.4.8.
532    pub(crate) fn new(set: &SequenceSet) -> Option<Self> {
533        let s = set.as_str();
534        let mut intervals: Vec<(u32, u32)> = Vec::new();
535
536        for part in s.split(',') {
537            // RFC 5182 Section 2: "$" references saved search results —
538            // unresolvable client-side.
539            if part.contains('$') {
540                return None;
541            }
542
543            if let Some((left, right)) = part.split_once(':') {
544                // seq-range: two seq-numbers separated by ":"
545                let start = Self::parse_seq_number(left)?;
546                let end = Self::parse_seq_number(right)?;
547                // Normalize: RFC 3501 Section 9 allows reversed ranges (e.g. "10:5").
548                let (lo, hi) = if start <= end {
549                    (start, end)
550                } else {
551                    (end, start)
552                };
553                intervals.push((lo, hi));
554            } else {
555                // Single seq-number
556                let n = Self::parse_seq_number(part)?;
557                intervals.push((n, n));
558            }
559        }
560
561        // Sort by start, then merge overlapping/adjacent intervals.
562        intervals.sort_unstable_by_key(|&(start, _)| start);
563        let mut merged: Vec<(u32, u32)> = Vec::with_capacity(intervals.len());
564        for (lo, hi) in intervals {
565            if let Some(last) = merged.last_mut() {
566                // Adjacent: last.end + 1 == lo (with saturating add to avoid overflow
567                // when last.end == u32::MAX).
568                if lo <= last.1 || lo == last.1.saturating_add(1) {
569                    last.1 = last.1.max(hi);
570                    continue;
571                }
572            }
573            merged.push((lo, hi));
574        }
575
576        Some(Self(merged))
577    }
578
579    /// Parse a single seq-number: `"*"` → `u32::MAX`, otherwise a non-zero u32.
580    fn parse_seq_number(s: &str) -> Option<u32> {
581        if s == "*" {
582            Some(u32::MAX)
583        } else {
584            s.parse::<u32>().ok()
585        }
586    }
587
588    /// Intersect a list of server-reported `VANISHED (EARLIER)` UID ranges
589    /// against this parsed set, keeping only UIDs that fall within the
590    /// requested set.
591    ///
592    /// Returns `(filtered_ranges, dropped_count)` where `dropped_count` is
593    /// the number of individual UIDs that were removed by filtering.
594    ///
595    /// Uses a two-pointer sweep: for each vanished interval, walk the
596    /// `ParsedUidSet` intervals to find overlapping regions.
597    ///
598    /// RFC 7162 Section 3.2.6: the server SHOULD limit `VANISHED (EARLIER)`
599    /// to the requested UID set, but non-conformant servers may not.
600    pub(crate) fn intersect_uid_ranges(&self, ranges: &[UidRange]) -> (Vec<UidRange>, usize) {
601        let mut result: Vec<UidRange> = Vec::new();
602        let mut total_input_uids: u64 = 0;
603        let mut total_output_uids: u64 = 0;
604
605        for range in ranges {
606            let v_start = range.start;
607            let v_end = range.end.unwrap_or(range.start);
608            // Defensive: normalize reversed ranges from buggy servers.
609            let (v_lo, v_hi) = if v_start <= v_end {
610                (v_start, v_end)
611            } else {
612                (v_end, v_start)
613            };
614            total_input_uids += u64::from(v_hi) - u64::from(v_lo) + 1;
615
616            // Two-pointer sweep against sorted, non-overlapping set intervals.
617            for &(s_lo, s_hi) in &self.0 {
618                // No overlap possible if the set interval is entirely after
619                // the vanished interval.
620                if s_lo > v_hi {
621                    break;
622                }
623                // No overlap if the set interval is entirely before.
624                if s_hi < v_lo {
625                    continue;
626                }
627                // Compute overlap.
628                let overlap_start = v_lo.max(s_lo);
629                let overlap_end = v_hi.min(s_hi);
630                // overlap_start <= overlap_end is guaranteed by the checks above.
631                total_output_uids += u64::from(overlap_end) - u64::from(overlap_start) + 1;
632                if overlap_start == overlap_end {
633                    result.push(UidRange::single(overlap_start));
634                } else {
635                    result.push(UidRange::range(overlap_start, overlap_end));
636                }
637            }
638        }
639
640        // Dropped = total input UIDs minus total output UIDs.
641        // In practice this count fits in usize — a VANISHED response cannot
642        // reference more than u32::MAX UIDs — but we use saturating conversion
643        // for 32-bit target safety.
644        let dropped_u64 = total_input_uids - total_output_uids;
645        let dropped = usize::try_from(dropped_u64).unwrap_or(usize::MAX);
646        (result, dropped)
647    }
648}
649
650// ---------------------------------------------------------------------------
651// Tests
652// ---------------------------------------------------------------------------
653
654#[cfg(test)]
655#[path = "validated_tests.rs"]
656mod tests;