daaki_imap/types/validated.rs
1//! Validated newtypes for IMAP protocol strings.
2//!
3//! These types enforce syntactic validity at construction time so that
4//! downstream code (the encoder, the connection layer) can rely on
5//! well-formedness without re-validating.
6//!
7//! - [`SequenceSet`] — RFC 3501 Section 9 / RFC 9051 Section 9
8//! - [`ImapAtom`] — RFC 3501 Section 9 / RFC 9051 Section 9
9//! - [`MailboxName`] — RFC 3501 Section 5.1 / RFC 9051 Section 5.1
10//! - [`ObjectId`] — RFC 8474 Section 4
11
12use std::fmt;
13
14pub use daaki_message::ValidationError;
15
16// ---------------------------------------------------------------------------
17// SequenceSet — RFC 3501 Section 9
18// ---------------------------------------------------------------------------
19
20/// A validated IMAP sequence-set string.
21///
22/// RFC 3501 Section 9 formal syntax:
23/// ```text
24/// sequence-set = (seq-number / seq-range) *("," sequence-set)
25/// seq-number = nz-number / "*"
26/// seq-range = seq-number ":" seq-number
27/// nz-number = digit-nz *DIGIT ; non-zero unsigned 32-bit integer
28/// ```
29///
30/// RFC 5182 Section 5 extends the grammar with `"$"` as a standalone
31/// element referencing saved search results.
32///
33/// Construction validates the full ABNF and rejects empty, malformed,
34/// or overflowing values.
35#[derive(Debug, Clone, PartialEq, Eq, Hash)]
36#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
37#[cfg_attr(feature = "serde", serde(try_from = "String", into = "String"))]
38pub struct SequenceSet(String);
39
40impl SequenceSet {
41 /// Consumes the wrapper and returns the inner string.
42 pub fn into_inner(self) -> String {
43 self.0
44 }
45
46 /// Create a general sequence set (allows `*` and `$`).
47 ///
48 /// Validates the input against RFC 3501 Section 9 `sequence-set` ABNF,
49 /// extended by RFC 5182 Section 5 for `$`.
50 pub fn new(s: impl Into<String>) -> Result<Self, ValidationError> {
51 let s = s.into();
52 if !is_valid_sequence_set(&s) {
53 return Err(ValidationError::new(format!(
54 "invalid sequence set per RFC 3501 Section 9: {s:?}"
55 )));
56 }
57 Ok(Self(s))
58 }
59
60 /// Create a "known" sequence set that rejects `*` and `$`.
61 ///
62 /// Used for QRESYNC `known-uids` and `seq-match-data` where the
63 /// wildcard `*` is explicitly disallowed (RFC 7162 Section 3.2.5.2)
64 /// and `$` (RFC 5182 search result reference) is not meaningful.
65 pub fn new_known(s: impl Into<String>) -> Result<Self, ValidationError> {
66 let s = s.into();
67 // RFC 7162 Section 3.2.5.2: "*" is not allowed in known-uids/sequence-set.
68 if s.contains('*') {
69 return Err(ValidationError::new(
70 "\"*\" is not allowed in QRESYNC known-uids/sequence-set \
71 (RFC 7162 Section 3.2.5.2)",
72 ));
73 }
74 // RFC 5182 search result reference is not valid in QRESYNC context.
75 if s.contains('$') {
76 return Err(ValidationError::new(
77 "\"$\" (search result reference) is not allowed in QRESYNC \
78 known-uids/sequence-set (RFC 7162 Section 7)",
79 ));
80 }
81 // Validate full sequence-set ABNF (since * and $ are already excluded,
82 // this validates nz-number ranges only).
83 if !is_valid_sequence_set(&s) {
84 return Err(ValidationError::new(
85 "QRESYNC known-uids/sequence-set is not a valid sequence-set \
86 (RFC 7162 Section 7: known-uids = sequence-set)",
87 ));
88 }
89 Ok(Self(s))
90 }
91
92 /// Return the inner string.
93 pub fn as_str(&self) -> &str {
94 &self.0
95 }
96}
97
98impl AsRef<str> for SequenceSet {
99 fn as_ref(&self) -> &str {
100 &self.0
101 }
102}
103
104impl fmt::Display for SequenceSet {
105 /// Formats the sequence set as its wire representation
106 /// (RFC 3501 Section 9).
107 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
108 f.write_str(&self.0)
109 }
110}
111
112impl TryFrom<String> for SequenceSet {
113 type Error = ValidationError;
114
115 /// Convert from `String`, validating per RFC 3501 Section 9.
116 fn try_from(s: String) -> Result<Self, Self::Error> {
117 Self::new(s)
118 }
119}
120
121impl TryFrom<&str> for SequenceSet {
122 type Error = ValidationError;
123
124 /// Convert from `&str`, validating per RFC 3501 Section 9.
125 fn try_from(s: &str) -> Result<Self, Self::Error> {
126 Self::new(s)
127 }
128}
129
130impl From<SequenceSet> for String {
131 fn from(s: SequenceSet) -> Self {
132 s.into_inner()
133 }
134}
135
136/// Validates that `s` matches the RFC 3501 Section 9 `sequence-set` ABNF:
137///
138/// ```text
139/// sequence-set = (seq-number / seq-range) *("," sequence-set)
140/// seq-number = nz-number / "*"
141/// seq-range = seq-number ":" seq-number
142/// nz-number = digit-nz *DIGIT
143/// ```
144///
145/// RFC 5182 Section 5 extends the grammar: `sequence-set =/ seq-last-command`,
146/// where `seq-last-command = "$"`.
147pub(crate) fn is_valid_sequence_set(s: &str) -> bool {
148 if s.is_empty() {
149 return false;
150 }
151 s.split(',').all(|part| {
152 if part.is_empty() {
153 return false;
154 }
155 // RFC 5182 Section 2: "$" references saved search results.
156 if part == "$" {
157 return true;
158 }
159 part.split(':').all(|num| {
160 if num == "*" {
161 return true;
162 }
163 // Must be non-empty, all digits, and not start with '0'.
164 if num.is_empty() || num.starts_with('0') || !num.chars().all(|c| c.is_ascii_digit()) {
165 return false;
166 }
167 // RFC 3501 Section 9: nz-number is a u32 (0 < n < 4,294,967,296).
168 num.parse::<u32>().is_ok()
169 }) && part.matches(':').count() <= 1
170 })
171}
172
173// ---------------------------------------------------------------------------
174// ImapAtom — RFC 3501 Section 9
175// ---------------------------------------------------------------------------
176
177/// A validated IMAP atom string.
178///
179/// RFC 3501 Section 9 / RFC 9051 Section 9:
180/// ```text
181/// atom = 1*ATOM-CHAR
182/// ATOM-CHAR = <any CHAR except atom-specials>
183/// atom-specials = "(" / ")" / "{" / SP / CTL / list-wildcards
184/// / quoted-specials / resp-specials
185/// list-wildcards = "%" / "*"
186/// quoted-specials = DQUOTE / "\"
187/// resp-specials = "]"
188/// ```
189///
190/// Used for flag keywords (`flag-keyword = atom`, RFC 3501 Section 9)
191/// and other protocol atoms.
192#[derive(Debug, Clone, PartialEq, Eq, Hash)]
193#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
194#[cfg_attr(feature = "serde", serde(try_from = "String", into = "String"))]
195pub struct ImapAtom(String);
196
197impl ImapAtom {
198 /// Consumes the wrapper and returns the inner string.
199 pub fn into_inner(self) -> String {
200 self.0
201 }
202
203 /// Create a validated atom.
204 ///
205 /// Returns an error if `s` is empty or contains any byte that is not
206 /// an ATOM-CHAR per RFC 3501 Section 9.
207 pub fn new(s: impl Into<String>) -> Result<Self, ValidationError> {
208 let s = s.into();
209 validate_atom_bytes(s.as_bytes(), "atom")?;
210 Ok(Self(s))
211 }
212
213 /// Return the inner string.
214 pub fn as_str(&self) -> &str {
215 &self.0
216 }
217}
218
219impl AsRef<str> for ImapAtom {
220 fn as_ref(&self) -> &str {
221 &self.0
222 }
223}
224
225impl fmt::Display for ImapAtom {
226 /// Formats the atom as its wire representation (RFC 3501 Section 9).
227 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
228 f.write_str(&self.0)
229 }
230}
231
232impl TryFrom<String> for ImapAtom {
233 type Error = ValidationError;
234
235 /// Convert from `String`, validating per RFC 3501 Section 9.
236 fn try_from(s: String) -> Result<Self, Self::Error> {
237 Self::new(s)
238 }
239}
240
241impl TryFrom<&str> for ImapAtom {
242 type Error = ValidationError;
243
244 /// Convert from `&str`, validating per RFC 3501 Section 9.
245 fn try_from(s: &str) -> Result<Self, Self::Error> {
246 Self::new(s)
247 }
248}
249
250impl From<ImapAtom> for String {
251 fn from(a: ImapAtom) -> Self {
252 a.into_inner()
253 }
254}
255
256/// Core atom validation logic shared by [`ImapAtom::new`] and the encoder's
257/// `validate_atom` helper.
258///
259/// RFC 3501 Section 9: `atom = 1*ATOM-CHAR`,
260/// `ATOM-CHAR = <any CHAR except atom-specials>`,
261/// `atom-specials = "(" / ")" / "{" / SP / CTL / list-wildcards / quoted-specials / resp-specials`,
262/// `list-wildcards = "%" / "*"`, `quoted-specials = DQUOTE / "\"`,
263/// `resp-specials = "]"`.
264pub(crate) fn validate_atom_bytes(bytes: &[u8], context: &str) -> Result<(), ValidationError> {
265 if bytes.is_empty() {
266 return Err(ValidationError::new(format!(
267 "{context} must be at least one character \
268 (RFC 3501 Section 9: atom = 1*ATOM-CHAR)"
269 )));
270 }
271 for &b in bytes {
272 let is_atom_special = matches!(
273 b,
274 b'(' | b')' | b'{' | b' ' | b'%' | b'*' | b'"' | b'\\' | b']'
275 );
276 let is_ctl = b < 0x20 || b == 0x7F;
277 let is_outside_char = b == 0 || b > 0x7F;
278 if is_atom_special || is_ctl || is_outside_char {
279 return Err(ValidationError::new(format!(
280 "{context} contains invalid byte 0x{b:02X} — must be an atom \
281 (RFC 3501 Section 9: ATOM-CHAR excludes atom-specials, CTL, non-ASCII)"
282 )));
283 }
284 }
285 Ok(())
286}
287
288// ---------------------------------------------------------------------------
289// MailboxName — RFC 3501 Section 5.1
290// ---------------------------------------------------------------------------
291
292/// A mailbox name string with minimal validation.
293///
294/// RFC 3501 Section 5.1 / RFC 9051 Section 5.1: IMAP mailbox names can
295/// contain almost any character (transmitted as quoted strings or literals).
296/// The primary purpose of this type is documentation and injection-safe
297/// command construction. IMAP strings forbid NUL (RFC 3501 Section 9 /
298/// RFC 9051 Section 9), and mailbox names must not contain CRLF because
299/// commands are line-delimited (RFC 3501 Section 2.2 / RFC 9051 Section 2.2).
300///
301/// The empty string `""` is allowed as a special case for server-level
302/// METADATA queries (RFC 5464 Section 4.2) and LIST reference names
303/// (RFC 3501 Section 6.3.8).
304///
305/// Validation:
306/// - No NUL, CR, or LF bytes
307///
308/// # Parse-don't-validate discipline
309///
310/// `MailboxName` has exactly two construction paths: [`MailboxName::new`]
311/// (public, validating) and `from_decoded` (codec-private). There is no
312/// `From<String>` or `From<&str>` — smuggling unvalidated data through
313/// the type is a compile error:
314///
315/// ```compile_fail
316/// use daaki_imap::MailboxName;
317/// // There is no From<String>. This must fail to compile.
318/// let _: MailboxName = "Inbox".to_string().into();
319/// ```
320///
321/// ```compile_fail
322/// use daaki_imap::MailboxName;
323/// // There is no From<&str>. This must fail to compile.
324/// let _: MailboxName = "Inbox".into();
325/// ```
326#[derive(Debug, Clone, PartialEq, Eq, Hash)]
327pub struct MailboxName(String);
328
329impl MailboxName {
330 /// Create a validated mailbox name.
331 ///
332 /// Rejects strings containing NUL (forbidden in IMAP strings by
333 /// RFC 3501 Section 9 / RFC 9051 Section 9) or CR/LF (CRLF injection
334 /// prevention per RFC 3501 Section 2.2 / RFC 9051 Section 2.2).
335 /// The empty string is allowed for server-level METADATA queries
336 /// (RFC 5464 Section 4.2).
337 pub fn new(s: impl Into<String>) -> Result<Self, ValidationError> {
338 let s = s.into();
339 if s.bytes().any(|b| matches!(b, b'\0' | b'\r' | b'\n')) {
340 return Err(ValidationError::new(
341 "mailbox name must not contain NUL, CR, or LF — IMAP strings \
342 forbid NUL (RFC 3501 Section 9 / RFC 9051 Section 9) and \
343 commands are CRLF-delimited (RFC 3501 Section 2.2 / \
344 RFC 9051 Section 2.2)",
345 ));
346 }
347 Ok(Self(s))
348 }
349
350 /// Return the inner string.
351 pub fn as_str(&self) -> &str {
352 &self.0
353 }
354
355 /// Construct from already-decoded bytes produced by the codec's
356 /// decoder. This constructor skips validation — callers must
357 /// guarantee the input has already been MUTF-7 decoded (or passed
358 /// through in UTF-8 mode per RFC 6855).
359 ///
360 /// This is the only non-validating constructor. Every other
361 /// constructor path validates via `new`.
362 ///
363 /// # Visibility
364 ///
365 /// Intended for use by `crate::codec` only. Rust's `pub(in path)`
366 /// requires the path to be an ancestor of the declaring module, so
367 /// `pub(in crate::codec)` cannot compile here in `crate::types`.
368 /// `pub(crate)` is the narrowest scope available.
369 pub(crate) fn from_decoded(s: String) -> Self {
370 Self(s)
371 }
372}
373
374impl Default for MailboxName {
375 /// Returns an empty mailbox name.
376 ///
377 /// The empty string is valid per RFC 5464 Section 4.2 (server-level
378 /// METADATA) and RFC 3501 Section 6.3.8 (LIST reference name).
379 fn default() -> Self {
380 Self(String::new())
381 }
382}
383
384impl AsRef<str> for MailboxName {
385 fn as_ref(&self) -> &str {
386 &self.0
387 }
388}
389
390impl fmt::Display for MailboxName {
391 /// Formats the mailbox name (RFC 3501 Section 5.1).
392 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
393 f.write_str(&self.0)
394 }
395}
396
397impl TryFrom<String> for MailboxName {
398 type Error = ValidationError;
399
400 /// Convert from `String`, validating per RFC 3501 Section 5.1.
401 fn try_from(s: String) -> Result<Self, Self::Error> {
402 Self::new(s)
403 }
404}
405
406// ---------------------------------------------------------------------------
407// ObjectId — RFC 8474 Section 4
408// ---------------------------------------------------------------------------
409
410/// An IMAP object identifier (MAILBOXID, EMAILID, THREADID).
411///
412/// RFC 8474 Section 4:
413/// ```text
414/// objectid = 1*255(ALPHA / DIGIT / "_" / "-")
415/// ```
416///
417/// Used for `MAILBOXID`, `EMAILID`, and `THREADID` values.
418#[derive(Debug, Clone, PartialEq, Eq, Hash)]
419#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
420#[cfg_attr(feature = "serde", serde(try_from = "String", into = "String"))]
421pub struct ObjectId(String);
422
423impl ObjectId {
424 /// Consumes the wrapper and returns the inner string.
425 pub fn into_inner(self) -> String {
426 self.0
427 }
428
429 /// Create a validated object identifier.
430 ///
431 /// RFC 8474 Section 4: 1–255 characters, each alphanumeric, dash, or
432 /// underscore.
433 pub fn new(s: impl Into<String>) -> Result<Self, ValidationError> {
434 let s = s.into();
435 if s.is_empty() {
436 return Err(ValidationError::new(
437 "object identifier must not be empty (RFC 8474 Section 4)",
438 ));
439 }
440 if s.len() > 255 {
441 return Err(ValidationError::new(format!(
442 "object identifier exceeds 255 characters ({} bytes) \
443 (RFC 8474 Section 4: objectid = 1*255(...))",
444 s.len()
445 )));
446 }
447 for &b in s.as_bytes() {
448 let valid = b.is_ascii_alphanumeric() || b == b'-' || b == b'_';
449 if !valid {
450 return Err(ValidationError::new(format!(
451 "object identifier contains invalid byte 0x{b:02X} — \
452 only ALPHA / DIGIT / \"_\" / \"-\" are allowed \
453 (RFC 8474 Section 4)"
454 )));
455 }
456 }
457 Ok(Self(s))
458 }
459
460 /// Return the inner string.
461 pub fn as_str(&self) -> &str {
462 &self.0
463 }
464}
465
466impl AsRef<str> for ObjectId {
467 fn as_ref(&self) -> &str {
468 &self.0
469 }
470}
471
472impl fmt::Display for ObjectId {
473 /// Formats the object identifier (RFC 8474 Section 4).
474 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
475 f.write_str(&self.0)
476 }
477}
478
479impl TryFrom<String> for ObjectId {
480 type Error = ValidationError;
481
482 /// Convert from `String`, validating per RFC 8474 Section 4.
483 fn try_from(s: String) -> Result<Self, Self::Error> {
484 Self::new(s)
485 }
486}
487
488impl TryFrom<&str> for ObjectId {
489 type Error = ValidationError;
490
491 /// Convert from `&str`, validating per RFC 8474 Section 4.
492 fn try_from(s: &str) -> Result<Self, Self::Error> {
493 Self::new(s)
494 }
495}
496
497impl From<ObjectId> for String {
498 fn from(o: ObjectId) -> Self {
499 o.into_inner()
500 }
501}
502
503// ---------------------------------------------------------------------------
504// ParsedUidSet — RFC 7162 Section 3.2.6 defensive filtering
505// ---------------------------------------------------------------------------
506
507use crate::types::UidRange;
508
509/// A parsed, normalized set of UID intervals for efficient containment checks.
510///
511/// Constructed from a [`SequenceSet`] by parsing its string representation into
512/// sorted, merged, non-overlapping inclusive `(start, end)` intervals.
513///
514/// Used by `FetchVanishedConsumer` to defensively filter `VANISHED (EARLIER)`
515/// UIDs that fall outside the requested set (RFC 7162 Section 3.2.6).
516///
517/// Returns `None` from [`ParsedUidSet::new`] when the set contains `$`
518/// (RFC 5182 search result reference), which cannot be resolved client-side.
519/// `*` is mapped to `u32::MAX` as a conservative upper bound — safe because `*`
520/// means "the highest UID in the mailbox" (RFC 3501 Section 6.4.8), and all
521/// reportable UIDs are <= that value.
522pub(crate) struct ParsedUidSet(Vec<(u32, u32)>);
523
524impl ParsedUidSet {
525 /// Parse a [`SequenceSet`] into sorted, merged, non-overlapping intervals.
526 ///
527 /// Returns `None` if the set contains `$` (RFC 5182 search result
528 /// reference), which cannot be resolved client-side — the caller should
529 /// skip filtering entirely in that case.
530 ///
531 /// `*` is mapped to `u32::MAX` per RFC 3501 Section 6.4.8.
532 pub(crate) fn new(set: &SequenceSet) -> Option<Self> {
533 let s = set.as_str();
534 let mut intervals: Vec<(u32, u32)> = Vec::new();
535
536 for part in s.split(',') {
537 // RFC 5182 Section 2: "$" references saved search results —
538 // unresolvable client-side.
539 if part.contains('$') {
540 return None;
541 }
542
543 if let Some((left, right)) = part.split_once(':') {
544 // seq-range: two seq-numbers separated by ":"
545 let start = Self::parse_seq_number(left)?;
546 let end = Self::parse_seq_number(right)?;
547 // Normalize: RFC 3501 Section 9 allows reversed ranges (e.g. "10:5").
548 let (lo, hi) = if start <= end {
549 (start, end)
550 } else {
551 (end, start)
552 };
553 intervals.push((lo, hi));
554 } else {
555 // Single seq-number
556 let n = Self::parse_seq_number(part)?;
557 intervals.push((n, n));
558 }
559 }
560
561 // Sort by start, then merge overlapping/adjacent intervals.
562 intervals.sort_unstable_by_key(|&(start, _)| start);
563 let mut merged: Vec<(u32, u32)> = Vec::with_capacity(intervals.len());
564 for (lo, hi) in intervals {
565 if let Some(last) = merged.last_mut() {
566 // Adjacent: last.end + 1 == lo (with saturating add to avoid overflow
567 // when last.end == u32::MAX).
568 if lo <= last.1 || lo == last.1.saturating_add(1) {
569 last.1 = last.1.max(hi);
570 continue;
571 }
572 }
573 merged.push((lo, hi));
574 }
575
576 Some(Self(merged))
577 }
578
579 /// Parse a single seq-number: `"*"` → `u32::MAX`, otherwise a non-zero u32.
580 fn parse_seq_number(s: &str) -> Option<u32> {
581 if s == "*" {
582 Some(u32::MAX)
583 } else {
584 s.parse::<u32>().ok()
585 }
586 }
587
588 /// Intersect a list of server-reported `VANISHED (EARLIER)` UID ranges
589 /// against this parsed set, keeping only UIDs that fall within the
590 /// requested set.
591 ///
592 /// Returns `(filtered_ranges, dropped_count)` where `dropped_count` is
593 /// the number of individual UIDs that were removed by filtering.
594 ///
595 /// Uses a two-pointer sweep: for each vanished interval, walk the
596 /// `ParsedUidSet` intervals to find overlapping regions.
597 ///
598 /// RFC 7162 Section 3.2.6: the server SHOULD limit `VANISHED (EARLIER)`
599 /// to the requested UID set, but non-conformant servers may not.
600 pub(crate) fn intersect_uid_ranges(&self, ranges: &[UidRange]) -> (Vec<UidRange>, usize) {
601 let mut result: Vec<UidRange> = Vec::new();
602 let mut total_input_uids: u64 = 0;
603 let mut total_output_uids: u64 = 0;
604
605 for range in ranges {
606 let v_start = range.start;
607 let v_end = range.end.unwrap_or(range.start);
608 // Defensive: normalize reversed ranges from buggy servers.
609 let (v_lo, v_hi) = if v_start <= v_end {
610 (v_start, v_end)
611 } else {
612 (v_end, v_start)
613 };
614 total_input_uids += u64::from(v_hi) - u64::from(v_lo) + 1;
615
616 // Two-pointer sweep against sorted, non-overlapping set intervals.
617 for &(s_lo, s_hi) in &self.0 {
618 // No overlap possible if the set interval is entirely after
619 // the vanished interval.
620 if s_lo > v_hi {
621 break;
622 }
623 // No overlap if the set interval is entirely before.
624 if s_hi < v_lo {
625 continue;
626 }
627 // Compute overlap.
628 let overlap_start = v_lo.max(s_lo);
629 let overlap_end = v_hi.min(s_hi);
630 // overlap_start <= overlap_end is guaranteed by the checks above.
631 total_output_uids += u64::from(overlap_end) - u64::from(overlap_start) + 1;
632 if overlap_start == overlap_end {
633 result.push(UidRange::single(overlap_start));
634 } else {
635 result.push(UidRange::range(overlap_start, overlap_end));
636 }
637 }
638 }
639
640 // Dropped = total input UIDs minus total output UIDs.
641 // In practice this count fits in usize — a VANISHED response cannot
642 // reference more than u32::MAX UIDs — but we use saturating conversion
643 // for 32-bit target safety.
644 let dropped_u64 = total_input_uids - total_output_uids;
645 let dropped = usize::try_from(dropped_u64).unwrap_or(usize::MAX);
646 (result, dropped)
647 }
648}
649
650// ---------------------------------------------------------------------------
651// Tests
652// ---------------------------------------------------------------------------
653
654#[cfg(test)]
655#[path = "validated_tests.rs"]
656mod tests;