Skip to main content

wasm_smtp/
protocol.rs

1//! SMTP wire-format helpers.
2//!
3//! This module is the home for all logic that touches SMTP bytes directly:
4//!
5//! - [`parse_reply_line`] interprets a single CRLF-stripped reply line.
6//! - [`Reply`] aggregates one or more lines into a complete reply.
7//! - [`format_command`] / [`format_command_arg`] produce CRLF-terminated
8//!   command bytes.
9//! - [`dot_stuff_and_terminate`] produces a complete DATA payload from a
10//!   user-supplied body, including the `\r\n.\r\n` terminator.
11//! - [`base64_encode`] is a small, dependency-free encoder used for
12//!   `AUTH LOGIN`. We do not need a decoder.
13//! - The `validate_*` functions reject caller input that would inject CRLF
14//!   sequences or otherwise violate SMTP grammar before any byte is sent.
15//!
16//! None of these helpers perform I/O; they operate on borrowed buffers and
17//! return owned bytes or errors.
18
19use crate::error::{InvalidInputError, ProtocolError};
20
21/// Maximum length of a single reply line, excluding CRLF.
22///
23/// RFC 5321 §4.5.3.1.5 sets a 512-octet limit for reply lines including
24/// CRLF. We accept up to 998 octets of text plus CRLF (the body line limit
25/// from §4.5.3.1.6) to be lenient toward real-world server software that
26/// occasionally exceeds the strict reply-line limit.
27pub const MAX_REPLY_LINE_LEN: usize = 998;
28
29/// Maximum number of lines accepted in a single multi-line reply.
30///
31/// SMTP does not specify a hard cap, but a reasonable defensive limit
32/// prevents an unbounded server from causing unbounded allocation.
33pub const MAX_REPLY_LINES: usize = 128;
34
35/// Maximum length of an envelope address (RFC 5321 §4.5.3.1.3).
36///
37/// The standard's `Path` limit is 256 octets, including the angle
38/// brackets that frame the address on the wire. With brackets
39/// stripped, the validated address may be at most 254 octets.
40pub const MAX_ADDRESS_LEN: usize = 254;
41
42/// Maximum length of an address local-part (RFC 5321 §4.5.3.1.1).
43pub const MAX_LOCAL_PART_LEN: usize = 64;
44
45/// Maximum length of an address domain (RFC 5321 §4.5.3.1.2).
46pub const MAX_DOMAIN_LEN: usize = 255;
47
48// -----------------------------------------------------------------------------
49// Reply parsing
50// -----------------------------------------------------------------------------
51
52/// One parsed line of an SMTP reply.
53#[derive(Debug, Clone, PartialEq, Eq)]
54pub struct ReplyLine<'a> {
55    /// The three-digit reply code.
56    pub code: u16,
57    /// `true` if this line was terminated with a space (last line of a
58    /// reply); `false` if terminated with `-` (continuation).
59    pub is_last: bool,
60    /// The text portion after the separator. May be empty.
61    pub text: &'a [u8],
62}
63
64/// Parse a single CRLF-stripped reply line.
65///
66/// The input must not contain the terminating CRLF.
67pub fn parse_reply_line(line: &[u8]) -> Result<ReplyLine<'_>, ProtocolError> {
68    if line.len() < 3 {
69        return Err(malformed(line));
70    }
71    let d0 = ascii_digit_value(line[0]).ok_or_else(|| malformed(line))?;
72    let d1 = ascii_digit_value(line[1]).ok_or_else(|| malformed(line))?;
73    let d2 = ascii_digit_value(line[2]).ok_or_else(|| malformed(line))?;
74    let code = u16::from(d0) * 100 + u16::from(d1) * 10 + u16::from(d2);
75
76    if line.len() == 3 {
77        // RFC 5321 requires a separator, but a code-only line with no text
78        // and no separator is unambiguous: treat it as a last line.
79        return Ok(ReplyLine {
80            code,
81            is_last: true,
82            text: &[],
83        });
84    }
85    let (is_last, text) = match line[3] {
86        b' ' => (true, &line[4..]),
87        b'-' => (false, &line[4..]),
88        _ => return Err(malformed(line)),
89    };
90    Ok(ReplyLine {
91        code,
92        is_last,
93        text,
94    })
95}
96
97fn ascii_digit_value(b: u8) -> Option<u8> {
98    if b.is_ascii_digit() {
99        Some(b - b'0')
100    } else {
101        None
102    }
103}
104
105fn malformed(line: &[u8]) -> ProtocolError {
106    ProtocolError::Malformed(String::from_utf8_lossy(line).into_owned())
107}
108
109/// An enhanced status code from RFC 3463, parsed out of an SMTP reply
110/// when the server has advertised the `ENHANCEDSTATUSCODES` extension
111/// (RFC 2034).
112///
113/// Enhanced codes are formatted `class.subject.detail`, for example
114/// `5.7.1` (relay access denied) or `4.7.0` (security feature
115/// temporarily unavailable). The basic three-digit reply code (e.g.
116/// `550`) and the enhanced code share the leading digit (the
117/// "class"); the remaining two fields refine the diagnosis far
118/// beyond what the basic code carries.
119///
120/// This type is preserved across the [`Reply`] on which it is parsed,
121/// and reproduced in [`crate::ProtocolError::UnexpectedCode`] when an
122/// unexpected reply triggers an error. Callers can use the structured
123/// fields to make routing decisions ("if subject is 5.1.* the address
124/// is permanently bad; if 4.x.x retry later").
125///
126/// Per RFC 3463 §2:
127/// - `class` is one of 2, 4, or 5 (success / persistent transient /
128///   permanent).
129/// - `subject` and `detail` are 0–999.
130#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
131pub struct EnhancedStatus {
132    /// Leading class digit (2, 4, or 5).
133    pub class: u8,
134    /// Second field: the broad subject category (e.g. `1` = address,
135    /// `7` = security/policy).
136    pub subject: u16,
137    /// Third field: the specific detail within the subject.
138    pub detail: u16,
139}
140
141impl EnhancedStatus {
142    /// Format as `class.subject.detail`. This is the wire form RFC 3463
143    /// uses, with the leading dot-decimal and no padding.
144    #[must_use]
145    pub fn to_dotted(&self) -> String {
146        format!("{}.{}.{}", self.class, self.subject, self.detail)
147    }
148}
149
150impl core::fmt::Display for EnhancedStatus {
151    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
152        write!(f, "{}.{}.{}", self.class, self.subject, self.detail)
153    }
154}
155
156/// Try to parse an [`EnhancedStatus`] from the start of a reply line's
157/// text portion.
158///
159/// The expected format is `"x.y.z"` followed by either end-of-string,
160/// whitespace, or any other non-digit-non-dot byte. Invalid prefixes
161/// — including missing dots, non-digit characters, or class digits
162/// other than `2`, `4`, `5` — return `None`. The caller advances
163/// past the parsed prefix only when this returns `Some`.
164///
165/// Returns `(status, prefix_len)` where `prefix_len` is the number of
166/// bytes consumed from `text`, including any single trailing
167/// whitespace octet. This lets [`Reply::joined_text`] strip the code
168/// before showing the user-facing message.
169fn parse_enhanced_status_prefix(text: &str) -> Option<(EnhancedStatus, usize)> {
170    // We require at least 5 chars (`x.y.z`) and a class digit in {2,4,5}.
171    let bytes = text.as_bytes();
172    if bytes.len() < 5 {
173        return None;
174    }
175    let class_byte = bytes[0];
176    if !matches!(class_byte, b'2' | b'4' | b'5') || bytes[1] != b'.' {
177        return None;
178    }
179
180    // subject: digits, terminated by '.'.
181    let mut i = 2;
182    let subj_start = i;
183    while i < bytes.len() && bytes[i].is_ascii_digit() {
184        i += 1;
185    }
186    if i == subj_start || i >= bytes.len() || bytes[i] != b'.' {
187        return None;
188    }
189    let subject: u16 = text[subj_start..i].parse().ok()?;
190    i += 1;
191
192    // detail: digits, terminated by whitespace or end of string.
193    let det_start = i;
194    while i < bytes.len() && bytes[i].is_ascii_digit() {
195        i += 1;
196    }
197    if i == det_start {
198        return None;
199    }
200    let detail: u16 = text[det_start..i].parse().ok()?;
201
202    // The terminator: end-of-string, single space, or single tab.
203    // We consume one whitespace byte so the user-facing message starts
204    // cleanly. Any other non-digit byte is allowed but not consumed.
205    let prefix_len = if i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
206        i + 1
207    } else {
208        i
209    };
210
211    Some((
212        EnhancedStatus {
213            class: class_byte - b'0',
214            subject,
215            detail,
216        },
217        prefix_len,
218    ))
219}
220
221/// A complete SMTP reply, possibly assembled from multiple continuation
222/// lines.
223#[derive(Debug, Clone, PartialEq, Eq)]
224pub struct Reply {
225    /// The three-digit reply code, shared by every line of the reply.
226    pub code: u16,
227    /// One entry per line, in the order received. Each entry is the line's
228    /// text portion (after the code and separator) decoded as UTF-8 with
229    /// invalid sequences replaced by `U+FFFD`. The text retains any
230    /// enhanced status code prefix; use [`Self::message_text`] to obtain
231    /// the same text with the prefix stripped, or [`Self::enhanced`] to
232    /// obtain the parsed code itself.
233    pub lines: Vec<String>,
234    /// Parsed enhanced status code (RFC 3463), set only when the server
235    /// has advertised `ENHANCEDSTATUSCODES` for this session. The code
236    /// is taken from the first reply line; multi-line replies that
237    /// disagree on the code are flagged at parse time, so this is well
238    /// defined when present.
239    enhanced: Option<EnhancedStatus>,
240}
241
242impl Reply {
243    /// Construct a reply with the given code and lines, with no enhanced
244    /// status code attached. The client adds an enhanced code via the
245    /// internal `attach_enhanced_status` setter when the session has
246    /// `ENHANCEDSTATUSCODES` enabled.
247    #[must_use]
248    pub fn new(code: u16, lines: Vec<String>) -> Self {
249        Self {
250            code,
251            lines,
252            enhanced: None,
253        }
254    }
255
256    /// The leading digit of the reply code, useful for class-based checks.
257    pub fn class(&self) -> u8 {
258        u8::try_from(self.code / 100).unwrap_or(0)
259    }
260
261    /// Reply text concatenated with `\n`. Suitable for diagnostics.
262    /// If an enhanced status code prefix is present, it is preserved in
263    /// the output; use [`Self::message_text`] for a presentation that
264    /// hides it.
265    ///
266    /// # Caveat for log handlers
267    ///
268    /// The returned `String` may contain `\n` (used internally to
269    /// separate multi-line replies). It does **not** contain `\r` —
270    /// CRLF is stripped by the reply parser before storage — but
271    /// applications that forward this text to line-oriented loggers
272    /// (`syslog`, journald, structured JSON, etc.) should still
273    /// escape or render newlines explicitly to avoid log injection
274    /// where one logical reply renders as multiple log records. The
275    /// same caveat applies to anything else that consumes the
276    /// `Display` output of [`crate::ProtocolError`] or
277    /// [`crate::AuthError`], since those types embed reply text.
278    pub fn joined_text(&self) -> String {
279        self.lines.join("\n")
280    }
281
282    /// Reply text with any enhanced status code prefix stripped from
283    /// each line. Suitable for human-facing error messages where the
284    /// code is shown separately. Lines that have no enhanced prefix
285    /// are returned unchanged.
286    pub fn message_text(&self) -> String {
287        if self.enhanced.is_none() {
288            return self.joined_text();
289        }
290        let stripped: Vec<&str> = self
291            .lines
292            .iter()
293            .map(|line| match parse_enhanced_status_prefix(line) {
294                Some((_, prefix_len)) => &line[prefix_len..],
295                None => line.as_str(),
296            })
297            .collect();
298        stripped.join("\n")
299    }
300
301    /// Parsed enhanced status code, if the server has provided one and
302    /// the session has it enabled.
303    #[must_use]
304    pub fn enhanced(&self) -> Option<EnhancedStatus> {
305        self.enhanced
306    }
307
308    /// Set the enhanced status code on this reply. Used by the client
309    /// after the EHLO capability set has been confirmed to include
310    /// `ENHANCEDSTATUSCODES`.
311    pub(crate) fn attach_enhanced_status(&mut self, status: EnhancedStatus) {
312        self.enhanced = Some(status);
313    }
314
315    /// Iterate over the trimmed text of each line. Useful for parsing EHLO
316    /// capabilities, where the first line contains the greeting and the
317    /// remaining lines each name a single capability (e.g. `AUTH LOGIN`,
318    /// `PIPELINING`, `8BITMIME`).
319    pub fn iter_lines(&self) -> impl Iterator<Item = &str> {
320        self.lines.iter().map(String::as_str)
321    }
322
323    /// Parse an enhanced status code from the first line's text, if
324    /// present. Used by the client to populate `self.enhanced` only when
325    /// the session has `ENHANCEDSTATUSCODES` enabled.
326    #[must_use]
327    pub fn try_parse_enhanced(&self) -> Option<EnhancedStatus> {
328        self.lines
329            .first()
330            .and_then(|line| parse_enhanced_status_prefix(line).map(|(s, _)| s))
331    }
332}
333
334// -----------------------------------------------------------------------------
335// Command formatting
336// -----------------------------------------------------------------------------
337
338/// Format a command with no arguments, terminated with CRLF.
339///
340/// Example: `format_command("QUIT")` yields `b"QUIT\r\n"`.
341pub fn format_command(verb: &str) -> Vec<u8> {
342    let mut buf = Vec::with_capacity(verb.len() + 2);
343    buf.extend_from_slice(verb.as_bytes());
344    buf.extend_from_slice(b"\r\n");
345    buf
346}
347
348/// Format a command with a single argument, terminated with CRLF.
349///
350/// Example: `format_command_arg("EHLO", "client.example.com")` yields
351/// `b"EHLO client.example.com\r\n"`.
352///
353/// Callers are responsible for argument validation; this function does no
354/// escaping.
355pub fn format_command_arg(verb: &str, arg: &str) -> Vec<u8> {
356    let mut buf = Vec::with_capacity(verb.len() + 1 + arg.len() + 2);
357    buf.extend_from_slice(verb.as_bytes());
358    buf.push(b' ');
359    buf.extend_from_slice(arg.as_bytes());
360    buf.extend_from_slice(b"\r\n");
361    buf
362}
363
364/// Format `MAIL FROM:<addr>\r\n`. The caller must validate `addr` first.
365pub fn format_mail_from(addr: &str) -> Vec<u8> {
366    let mut buf = Vec::with_capacity(13 + addr.len() + 2);
367    buf.extend_from_slice(b"MAIL FROM:<");
368    buf.extend_from_slice(addr.as_bytes());
369    buf.extend_from_slice(b">\r\n");
370    buf
371}
372
373/// Format `RCPT TO:<addr>\r\n`. The caller must validate `addr` first.
374pub fn format_rcpt_to(addr: &str) -> Vec<u8> {
375    let mut buf = Vec::with_capacity(11 + addr.len() + 2);
376    buf.extend_from_slice(b"RCPT TO:<");
377    buf.extend_from_slice(addr.as_bytes());
378    buf.extend_from_slice(b">\r\n");
379    buf
380}
381
382// -----------------------------------------------------------------------------
383// DATA payload
384// -----------------------------------------------------------------------------
385
386/// Produce the DATA-phase byte stream from a user-supplied body.
387///
388/// The output:
389///
390/// 1. has any line beginning with `.` doubled (RFC 5321 §4.5.2 dot-stuffing);
391/// 2. is guaranteed to end with `\r\n` (a CRLF is appended if the input
392///    does not already end with one);
393/// 3. is followed by the end-of-data terminator `.\r\n`.
394///
395/// The body is expected to be CRLF-normalized. The function does not
396/// translate lone LF or CR bytes; callers needing such translation should
397/// preprocess the body.
398///
399/// The body's bytes are not inspected beyond `\r`, `\n`, and `.`, so the
400/// payload may contain any 8-bit data the server is willing to accept (for
401/// example, after a `250 8BITMIME` capability advertisement).
402pub fn dot_stuff_and_terminate(body: &[u8]) -> Vec<u8> {
403    let mut out = Vec::with_capacity(body.len() + 8);
404    let mut at_line_start = true;
405    let mut prev: u8 = 0;
406    for &b in body {
407        if at_line_start && b == b'.' {
408            out.push(b'.');
409        }
410        out.push(b);
411        at_line_start = prev == b'\r' && b == b'\n';
412        prev = b;
413    }
414    if !out.ends_with(b"\r\n") {
415        out.extend_from_slice(b"\r\n");
416    }
417    out.extend_from_slice(b".\r\n");
418    out
419}
420
421// ---------------------------------------------------------------------------
422// Streaming dot-stuffer state machine
423// ---------------------------------------------------------------------------
424
425/// Streaming version of the RFC 5321 dot-stuffer.
426///
427/// Unlike [`dot_stuff_and_terminate`], this processes the message body
428/// one chunk at a time, keeping memory usage at O(chunk size) rather than
429/// O(body size). Suitable for large messages and memory-constrained runtimes.
430///
431/// ## Usage
432///
433/// ```rust
434/// use wasm_smtp::protocol::DotStufferState;
435///
436/// let mut stuffer = DotStufferState::new();
437///
438/// // Process each chunk.
439/// let chunk1 = b"Subject: test\r\n\r\n";
440/// let out1 = stuffer.process_chunk(chunk1);
441///
442/// let chunk2 = b".dotted line\r\nend\r\n";
443/// let out2 = stuffer.process_chunk(chunk2);
444///
445/// // Produce the end-of-data terminator.
446/// let terminator = stuffer.finish();
447///
448/// // on-wire: out1 + out2 + terminator
449/// assert_eq!(&out2[..2], b".."); // dot-stuffed
450/// assert_eq!(terminator, b".\r\n");
451/// ```
452#[derive(Debug, Clone)]
453pub struct DotStufferState {
454    /// True when the next byte to process is at the start of a line.
455    at_line_start: bool,
456    /// The last byte fed to `process_chunk`. Used by `finish()` to
457    /// determine whether a trailing `\r\n` must be added.
458    prev: u8,
459    /// The second-to-last byte. Together with `prev`, tells `finish()`
460    /// whether the body already ended with `\r\n`.
461    prev_prev: u8,
462    /// True until the first call to `process_chunk` with non-empty input.
463    empty: bool,
464}
465
466impl DotStufferState {
467    /// Create a new state machine, ready to process the first chunk.
468    #[must_use]
469    pub const fn new() -> Self {
470        Self {
471            at_line_start: true,
472            prev: 0,
473            prev_prev: 0,
474            empty: true,
475        }
476    }
477
478    /// Dot-stuff one chunk and return the processed bytes.
479    ///
480    /// The returned `Vec` is slightly larger than `chunk` only when one or
481    /// more lines in the chunk begin with `.`. All other bytes pass through
482    /// unchanged.
483    ///
484    /// `process_chunk` with an empty slice is a no-op and returns an empty
485    /// `Vec`.
486    ///
487    /// # Cross-chunk dot-stuffing
488    ///
489    /// The state machine correctly handles dots that appear at the start of
490    /// a line which spans two consecutive chunks. For example, if chunk N
491    /// ends with `\r\n` and chunk N+1 starts with `.`, the leading dot in
492    /// chunk N+1 will be stuffed.
493    pub fn process_chunk(&mut self, chunk: &[u8]) -> Vec<u8> {
494        if chunk.is_empty() {
495            return Vec::new();
496        }
497        // Worst case: every byte is a leading dot; output is 2× input.
498        let mut out = Vec::with_capacity(chunk.len() + 4);
499        for &b in chunk {
500            if self.at_line_start && b == b'.' {
501                out.push(b'.');
502            }
503            out.push(b);
504            let new_at_line_start = self.prev == b'\r' && b == b'\n';
505            self.prev_prev = self.prev;
506            self.prev = b;
507            self.at_line_start = new_at_line_start;
508        }
509        self.empty = false;
510        out
511    }
512
513    /// Consume the state machine and produce the end-of-DATA bytes.
514    ///
515    /// The output is:
516    ///
517    /// - `\r\n.\r\n` if the body did not end with `\r\n` (or was empty).
518    /// - `.\r\n` if the body already ended with `\r\n`.
519    ///
520    /// This matches the semantics of [`dot_stuff_and_terminate`] exactly.
521    #[must_use]
522    pub fn finish(self) -> Vec<u8> {
523        let ends_with_crlf =
524            !self.empty && self.prev_prev == b'\r' && self.prev == b'\n';
525        let mut out = Vec::with_capacity(5);
526        if !ends_with_crlf {
527            out.extend_from_slice(b"\r\n");
528        }
529        out.extend_from_slice(b".\r\n");
530        out
531    }
532}
533
534impl Default for DotStufferState {
535    fn default() -> Self {
536        Self::new()
537    }
538}
539
540// -----------------------------------------------------------------------------
541// Base64
542// -----------------------------------------------------------------------------
543
544const BASE64_ALPHABET: &[u8; 64] =
545    b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
546
547/// Standard base64 encoding (RFC 4648), padded with `=`.
548///
549/// Used for `AUTH LOGIN`. We deliberately avoid pulling in an external
550/// base64 dependency; the implementation is small and easy to audit.
551pub fn base64_encode(input: &[u8]) -> String {
552    let mut out = String::with_capacity(input.len().div_ceil(3) * 4);
553    let chunks = input.chunks_exact(3);
554    let rem = chunks.remainder();
555    for chunk in chunks {
556        let n = (u32::from(chunk[0]) << 16) | (u32::from(chunk[1]) << 8) | u32::from(chunk[2]);
557        push_b64(&mut out, n, 4);
558    }
559    match rem.len() {
560        0 => {}
561        1 => {
562            let n = u32::from(rem[0]) << 16;
563            push_b64(&mut out, n, 2);
564            out.push_str("==");
565        }
566        2 => {
567            let n = (u32::from(rem[0]) << 16) | (u32::from(rem[1]) << 8);
568            push_b64(&mut out, n, 3);
569            out.push('=');
570        }
571        _ => unreachable!(),
572    }
573    out
574}
575
576fn push_b64(out: &mut String, n: u32, count: u8) {
577    // count is the number of significant base64 characters to emit (2..=4)
578    // shifts: index 0 -> 18, 1 -> 12, 2 -> 6, 3 -> 0
579    for i in 0..count {
580        let shift = 18 - 6 * i;
581        let idx = ((n >> shift) & 0x3F) as usize;
582        out.push(char::from(BASE64_ALPHABET[idx]));
583    }
584}
585
586/// Standard base64 decoding (RFC 4648), padded with `=`.
587///
588/// The symmetric counterpart of [`base64_encode`]. Used for SCRAM
589/// `server-first` and `server-final` decoding.
590///
591/// Returns `Err` for inputs whose length is not a multiple of 4, or
592/// that contain characters outside the standard base64 alphabet
593/// (`A-Z`, `a-z`, `0-9`, `+`, `/`, `=`). Padding is allowed only at
594/// the end.
595///
596/// # Errors
597///
598/// Returns the static string `"invalid base64"` on any decode
599/// failure. The caller is expected to wrap this in a
600/// domain-appropriate error type.
601pub fn base64_decode(input: &str) -> Result<Vec<u8>, &'static str> {
602    let bytes = input.as_bytes();
603    if bytes.is_empty() {
604        return Ok(Vec::new());
605    }
606    if bytes.len() % 4 != 0 {
607        return Err("invalid base64");
608    }
609
610    let mut out = Vec::with_capacity(bytes.len() / 4 * 3);
611    for (chunk_idx, chunk) in bytes.chunks_exact(4).enumerate() {
612        let is_last = chunk_idx == (bytes.len() / 4) - 1;
613        let mut buf = [0u8; 4];
614        let mut pad = 0usize;
615        for (i, &c) in chunk.iter().enumerate() {
616            buf[i] = match c {
617                b'A'..=b'Z' => c - b'A',
618                b'a'..=b'z' => c - b'a' + 26,
619                b'0'..=b'9' => c - b'0' + 52,
620                b'+' => 62,
621                b'/' => 63,
622                b'=' => {
623                    pad += 1;
624                    0
625                }
626                _ => return Err("invalid base64"),
627            };
628        }
629        if pad > 0 && !is_last {
630            return Err("invalid base64");
631        }
632        let n = (u32::from(buf[0]) << 18)
633            | (u32::from(buf[1]) << 12)
634            | (u32::from(buf[2]) << 6)
635            | u32::from(buf[3]);
636        out.push(((n >> 16) & 0xff) as u8);
637        if pad < 2 {
638            out.push(((n >> 8) & 0xff) as u8);
639        }
640        if pad < 1 {
641            out.push((n & 0xff) as u8);
642        }
643    }
644    Ok(out)
645}
646
647// -----------------------------------------------------------------------------
648// Input validation
649// -----------------------------------------------------------------------------
650
651/// Validate a mail address (RFC 5321 reverse-path / forward-path content).
652///
653/// The check is intentionally conservative: it rejects the characters that
654/// would either inject SMTP commands or violate the framing of `<addr>`.
655/// Validate an envelope address (used in MAIL FROM / RCPT TO) against
656/// RFC 5321 grammar and the length limits in §4.5.3.1.
657///
658/// The check is conservative — it does not parse RFC 5321 grammar in
659/// detail, but it forbids any byte that would corrupt the command
660/// framing, and rejects values that exceed the standard's per-field
661/// length limits.
662///
663/// In particular:
664///
665/// - non-empty;
666/// - ASCII only — UTF-8 addresses require the `smtputf8` feature
667///   (which exposes a separate UTF-8-permissive validator);
668/// - no `\r`, `\n`, or `\0`;
669/// - no `<`, `>`, or space (which would corrupt the angle-bracket framing);
670/// - the whole address (local-part + `@` + domain) must be no longer
671///   than 254 octets — RFC 5321 §4.5.3.1.3 specifies 256 for the
672///   `Path` token including angle brackets, leaving 254 for the
673///   bracket-stripped address;
674/// - if an `@` is present, the local-part is no longer than 64 octets
675///   and the domain is no longer than 255 octets (§4.5.3.1.1 /
676///   §4.5.3.1.2). These limits are advisory: many real-world relays
677///   accept longer values, but rejecting at the client boundary
678///   prevents a misformed input from generating a wire `MAIL FROM`
679///   line that exceeds the SMTP line-length limit (§4.5.3.1.5).
680pub fn validate_address(addr: &str) -> Result<(), InvalidInputError> {
681    if addr.is_empty() {
682        return Err(InvalidInputError::new("mail address must not be empty"));
683    }
684    if !addr.is_ascii() {
685        return Err(InvalidInputError::new(
686            "mail address must be ASCII (SMTPUTF8 is not supported)",
687        ));
688    }
689    if addr.len() > MAX_ADDRESS_LEN {
690        return Err(InvalidInputError::new(
691            "mail address exceeds RFC 5321 §4.5.3.1.3 length limit (254 octets)",
692        ));
693    }
694    if let Some(at_pos) = addr.rfind('@') {
695        let (local, domain) = addr.split_at(at_pos);
696        // domain still has the leading '@' — strip it.
697        let domain = &domain[1..];
698        if local.len() > MAX_LOCAL_PART_LEN {
699            return Err(InvalidInputError::new(
700                "mail address local-part exceeds RFC 5321 §4.5.3.1.1 length limit (64 octets)",
701            ));
702        }
703        if domain.len() > MAX_DOMAIN_LEN {
704            return Err(InvalidInputError::new(
705                "mail address domain exceeds RFC 5321 §4.5.3.1.2 length limit (255 octets)",
706            ));
707        }
708    }
709    for b in addr.bytes() {
710        match b {
711            b'\r' | b'\n' => {
712                return Err(InvalidInputError::new(
713                    "mail address must not contain CR or LF",
714                ));
715            }
716            0 => {
717                return Err(InvalidInputError::new(
718                    "mail address must not contain a NUL byte",
719                ));
720            }
721            b'<' | b'>' => {
722                return Err(InvalidInputError::new(
723                    "mail address must not contain '<' or '>'",
724                ));
725            }
726            b' ' | b'\t' => {
727                return Err(InvalidInputError::new(
728                    "mail address must not contain whitespace",
729                ));
730            }
731            _ => {}
732        }
733    }
734    Ok(())
735}
736
737/// Validate the domain argument supplied to `EHLO`.
738///
739/// Accepts any non-empty sequence of printable ASCII (0x21..=0x7E). Address
740/// literals (e.g. `[192.0.2.1]`) and dotted FQDNs both pass. The check is
741/// intentionally lenient: its job is to prevent CRLF injection, not to
742/// enforce DNS syntax.
743pub fn validate_ehlo_domain(domain: &str) -> Result<(), InvalidInputError> {
744    if domain.is_empty() {
745        return Err(InvalidInputError::new("EHLO domain must not be empty"));
746    }
747    if !domain.is_ascii() {
748        return Err(InvalidInputError::new("EHLO domain must be ASCII"));
749    }
750    if domain.bytes().any(|b| !(0x21..=0x7E).contains(&b)) {
751        return Err(InvalidInputError::new(
752            "EHLO domain must contain only printable ASCII characters",
753        ));
754    }
755    Ok(())
756}
757
758/// Validate the username supplied to `AUTH LOGIN`.
759///
760/// As of v0.5.0 this is a thin alias for [`validate_plain_username`]:
761/// the two SASL mechanisms (PLAIN and LOGIN) accept the same shape
762/// of credential string and the same constraints apply. NUL bytes
763/// are rejected because they would corrupt the SASL framing on the
764/// post-base64 server side.
765///
766/// The function is retained for source compatibility with v0.4.x
767/// callers, but new code should use [`validate_plain_username`]
768/// directly. A future major release may remove this alias.
769pub fn validate_login_username(user: &str) -> Result<(), InvalidInputError> {
770    validate_plain_username(user)
771}
772
773/// Validate the password supplied to `AUTH LOGIN`.
774///
775/// As of v0.5.0 this is a thin alias for [`validate_plain_password`].
776/// See [`validate_login_username`] for the rationale.
777pub fn validate_login_password(pass: &str) -> Result<(), InvalidInputError> {
778    validate_plain_password(pass)
779}
780
781// -----------------------------------------------------------------------------
782// EHLO capability inspection
783// -----------------------------------------------------------------------------
784
785/// Return `true` if the EHLO capability lines advertise an `AUTH` mechanism
786/// named `mechanism`. The check is case-insensitive on both the keyword
787/// and the mechanism name.
788///
789/// `capability_lines` is the slice of lines that follows the greeting in
790/// an `EHLO` reply: each line is one extension (e.g. `"AUTH LOGIN PLAIN"`,
791/// `"PIPELINING"`, `"8BITMIME"`).
792pub fn ehlo_advertises_auth<S: AsRef<str>>(capability_lines: &[S], mechanism: &str) -> bool {
793    for line in capability_lines {
794        let mut parts = line.as_ref().split_ascii_whitespace();
795        let Some(head) = parts.next() else { continue };
796        if !head.eq_ignore_ascii_case("AUTH") {
797            continue;
798        }
799        for mech in parts {
800            if mech.eq_ignore_ascii_case(mechanism) {
801                return true;
802            }
803        }
804    }
805    false
806}
807
808/// Return `true` if the EHLO capability lines advertise the `STARTTLS`
809/// extension (RFC 3207). The check is case-insensitive on the keyword.
810///
811/// `capability_lines` is the slice of lines that follows the greeting in
812/// an `EHLO` reply; each line is one extension keyword optionally
813/// followed by parameters.
814pub fn ehlo_advertises_starttls<S: AsRef<str>>(capability_lines: &[S]) -> bool {
815    for line in capability_lines {
816        if let Some(head) = line.as_ref().split_ascii_whitespace().next()
817            && head.eq_ignore_ascii_case("STARTTLS")
818        {
819            return true;
820        }
821    }
822    false
823}
824
825/// Return `true` if the EHLO capability lines advertise the
826/// `ENHANCEDSTATUSCODES` extension (RFC 2034). The check is
827/// case-insensitive on the keyword.
828///
829/// When this is `true` for a session, the SMTP client parses the
830/// `class.subject.detail` prefix off each reply and exposes it as
831/// [`EnhancedStatus`] both on the [`Reply`] itself and on
832/// [`crate::ProtocolError::UnexpectedCode`]. When the keyword is not
833/// advertised, the same byte sequence in a reply (a stray "5.1.1"
834/// for instance) is left as-is in the message text and not parsed.
835pub fn ehlo_advertises_enhanced_status_codes<S: AsRef<str>>(capability_lines: &[S]) -> bool {
836    for line in capability_lines {
837        if let Some(head) = line.as_ref().split_ascii_whitespace().next()
838            && head.eq_ignore_ascii_case("ENHANCEDSTATUSCODES")
839        {
840            return true;
841        }
842    }
843    false
844}
845
846// -----------------------------------------------------------------------------
847// Authentication mechanisms
848// -----------------------------------------------------------------------------
849
850/// SASL authentication mechanisms supported by this client.
851///
852/// Today the crate implements `PLAIN` (RFC 4616) and `LOGIN` (the
853/// historical mechanism used by many submission servers). The enum is
854/// `non_exhaustive` so that future additions (e.g. `XOAUTH2`,
855/// `SCRAM-SHA-256`) do not require a major version bump.
856///
857/// `PLAIN` is preferred when both are advertised: it is one network
858/// round-trip rather than two, and is an IETF-standard SASL mechanism.
859/// `LOGIN` is retained for compatibility with older submission servers
860/// that advertise only it.
861#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
862#[non_exhaustive]
863pub enum AuthMechanism {
864    /// SASL `PLAIN` (RFC 4616). Sends `\0user\0pass` base64-encoded as
865    /// the initial response, completing in a single round-trip.
866    Plain,
867    /// `LOGIN`. Sends username and password as separate base64 lines
868    /// in response to two `334` server prompts.
869    Login,
870    /// SASL `XOAUTH2` (Google / Microsoft OAuth 2.0 SMTP extension).
871    /// Sends `user={user}\x01auth=Bearer {token}\x01\x01`
872    /// base64-encoded as the initial response. The "credential" passed
873    /// to `login_with` for this mechanism is an OAuth 2.0 access
874    /// token, not a static password — auto-selection by `login()`
875    /// deliberately does NOT pick this mechanism for that reason.
876    XOAuth2,
877    /// SASL `OAUTHBEARER` (RFC 7628). The IETF-standard OAuth 2.0 SASL
878    /// mechanism. Sends `n,a={user},\x01auth=Bearer {token}\x01\x01`
879    /// (GS2 header + Bearer token) base64-encoded as the initial
880    /// response. More interoperable than `XOAUTH2` and defined by an
881    /// IETF RFC. Like `XOAUTH2`, auto-selection by `login()` does NOT
882    /// pick this mechanism because the credential is a token, not a
883    /// password.
884    ///
885    /// Available only with the `oauthbearer` cargo feature (default-on).
886    OAuthBearer,
887    /// SASL `SCRAM-SHA-256` (RFC 5802 / RFC 7677). Challenge-response
888    /// authentication: the client never transmits the password, and
889    /// the server proves possession of the salted hash through a
890    /// signature step. Auto-selection by `login()` prefers this
891    /// mechanism over `PLAIN` and `LOGIN` when the server advertises
892    /// it.
893    ///
894    /// Available only with the `scram-sha-256` cargo feature
895    /// (default-on).
896    ScramSha256,
897}
898
899impl AuthMechanism {
900    /// SMTP-on-the-wire keyword for this mechanism.
901    #[must_use]
902    pub const fn name(self) -> &'static str {
903        match self {
904            Self::Plain => "PLAIN",
905            Self::Login => "LOGIN",
906            Self::XOAuth2 => "XOAUTH2",
907            Self::OAuthBearer => "OAUTHBEARER",
908            Self::ScramSha256 => "SCRAM-SHA-256",
909        }
910    }
911}
912
913impl core::fmt::Display for AuthMechanism {
914    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
915        f.write_str(self.name())
916    }
917}
918
919/// Pick the best mechanism advertised by the server, preferring
920/// `SCRAM-SHA-256` over `PLAIN` over `LOGIN`. Returns `None` if the
921/// server advertised none of these.
922///
923/// Use this when you want a single `login` call to do the right thing
924/// across the variety of submission servers in deployment. If you need
925/// to lock in a specific mechanism (for example, to reproduce a
926/// production failure in a test), call [`crate::client::SmtpClient::login_with`]
927/// directly.
928///
929/// `SCRAM-SHA-256` is the modern default: it does not transmit the
930/// password in plaintext and is supported by all current submission
931/// servers (Postfix + Dovecot SASL, Exchange, Stalwart). `PLAIN` is
932/// the universal fallback. `LOGIN` is retained only for very old
933/// servers.
934///
935/// Note: when the `scram-sha-256` feature is disabled, the function
936/// behaves as if SCRAM were not in the picture and falls through to
937/// the PLAIN/LOGIN preference.
938pub fn select_auth_mechanism<S: AsRef<str>>(capability_lines: &[S]) -> Option<AuthMechanism> {
939    #[cfg(feature = "scram-sha-256")]
940    if ehlo_advertises_auth(capability_lines, "SCRAM-SHA-256") {
941        return Some(AuthMechanism::ScramSha256);
942    }
943
944    if ehlo_advertises_auth(capability_lines, "PLAIN") {
945        Some(AuthMechanism::Plain)
946    } else if ehlo_advertises_auth(capability_lines, "LOGIN") {
947        Some(AuthMechanism::Login)
948    } else {
949        None
950    }
951}
952
953/// Build the SASL `PLAIN` initial response for the given credentials.
954///
955/// The result is the base64 encoding of `\0user\0pass` (RFC 4616 §2).
956/// The empty authorization identity (the part before the first NUL)
957/// means "act as the authenticated user", which is the correct default
958/// for SMTP submission.
959///
960/// The caller is responsible for the surrounding command framing; the
961/// full on-wire bytes are `b"AUTH PLAIN " + result + b"\r\n"`.
962///
963/// # Encoding
964///
965/// `user` and `pass` are encoded as their UTF-8 bytes. RFC 4616 mandates
966/// UTF-8 for both fields; this matches Rust's `String` representation.
967#[must_use]
968pub fn build_auth_plain_initial_response(user: &str, pass: &str) -> String {
969    let mut payload = Vec::with_capacity(2 + user.len() + pass.len());
970    payload.push(0u8); // empty authzid
971    payload.extend_from_slice(user.as_bytes());
972    payload.push(0u8);
973    payload.extend_from_slice(pass.as_bytes());
974    base64_encode(&payload)
975}
976
977/// Validate the username supplied to a SASL `PLAIN` `AUTH` exchange.
978///
979/// RFC 4616 forbids NUL bytes in the authcid (NUL is the field
980/// separator). Empty usernames are also refused: while RFC 4616 itself
981/// allows them, no SMTP submission server accepts an empty login, and
982/// rejecting them up-front turns a server-side failure into a
983/// programmer-visible one.
984pub fn validate_plain_username(user: &str) -> Result<(), InvalidInputError> {
985    if user.is_empty() {
986        return Err(InvalidInputError::new("AUTH username must not be empty"));
987    }
988    if user.bytes().any(|b| b == 0) {
989        return Err(InvalidInputError::new(
990            "AUTH username must not contain a NUL byte",
991        ));
992    }
993    Ok(())
994}
995
996/// Validate the password supplied to a SASL `PLAIN` `AUTH` exchange.
997///
998/// As with [`validate_plain_username`], NUL bytes are forbidden because
999/// they would corrupt the SASL framing.
1000pub fn validate_plain_password(pass: &str) -> Result<(), InvalidInputError> {
1001    if pass.is_empty() {
1002        return Err(InvalidInputError::new("AUTH password must not be empty"));
1003    }
1004    if pass.bytes().any(|b| b == 0) {
1005        return Err(InvalidInputError::new(
1006            "AUTH password must not contain a NUL byte",
1007        ));
1008    }
1009    Ok(())
1010}
1011
1012// -----------------------------------------------------------------------------
1013// XOAUTH2 (Google / Microsoft OAuth 2.0 SASL profile)
1014//
1015// The three helpers in this section are feature-gated behind
1016// `xoauth2` (default-on). Disabling the feature removes them
1017// entirely along with the corresponding `SmtpClient::login_xoauth2`
1018// method and the `XOAuth2` arm of `login_with`. The
1019// `AuthMechanism::XOAuth2` and `SmtpOp::AuthXOAuth2` enum variants
1020// remain present in either configuration; both enums are
1021// `non_exhaustive` and the variants without the feature are simply
1022// unreachable through the public API.
1023// -----------------------------------------------------------------------------
1024
1025/// Build the SASL `XOAUTH2` initial response.
1026///
1027/// The wire format, before base64, is:
1028///
1029/// ```text
1030/// user={user}\x01auth=Bearer {token}\x01\x01
1031/// ```
1032///
1033/// where `\x01` is the SOH (Ctrl-A) byte that separates fields. The
1034/// `Bearer ` prefix is fixed and case-sensitive. Both the user and the
1035/// token are passed through verbatim; the caller must have validated
1036/// them with [`validate_xoauth2_user`] and [`validate_oauth2_token`]
1037/// first.
1038///
1039/// The returned string is the base64 encoding of the entire payload,
1040/// suitable for placement after `AUTH XOAUTH2 ` on the wire. The
1041/// caller is responsible for the surrounding command framing.
1042///
1043/// Available only with the `xoauth2` cargo feature enabled (default-on).
1044#[cfg(feature = "xoauth2")]
1045#[must_use]
1046pub fn build_xoauth2_initial_response(user: &str, token: &str) -> String {
1047    // Length: "user=" (5) + user + 1 (SOH) + "auth=Bearer " (12) + token
1048    //         + 1 (SOH) + 1 (final SOH) = 19 + user.len() + token.len()
1049    let mut payload = Vec::with_capacity(19 + user.len() + token.len());
1050    payload.extend_from_slice(b"user=");
1051    payload.extend_from_slice(user.as_bytes());
1052    payload.push(0x01);
1053    payload.extend_from_slice(b"auth=Bearer ");
1054    payload.extend_from_slice(token.as_bytes());
1055    payload.push(0x01);
1056    payload.push(0x01);
1057    base64_encode(&payload)
1058}
1059
1060/// Validate the username supplied to a SASL `XOAUTH2` exchange.
1061///
1062/// XOAUTH2 (Google / Microsoft) does not formally constrain the user
1063/// field, but to prevent injection of the SOH separator, NUL, CR,
1064/// or LF into the SASL payload, we forbid those bytes. Empty
1065/// usernames are also rejected.
1066///
1067/// Available only with the `xoauth2` cargo feature enabled (default-on).
1068#[cfg(feature = "xoauth2")]
1069pub fn validate_xoauth2_user(user: &str) -> Result<(), InvalidInputError> {
1070    if user.is_empty() {
1071        return Err(InvalidInputError::new("XOAUTH2 user must not be empty"));
1072    }
1073    if user.bytes().any(|b| matches!(b, 0 | b'\r' | b'\n' | 0x01)) {
1074        return Err(InvalidInputError::new(
1075            "XOAUTH2 user must not contain NUL, CR, LF, or SOH",
1076        ));
1077    }
1078    Ok(())
1079}
1080
1081/// Validate an OAuth 2.0 access token before sending it on the wire.
1082///
1083/// RFC 6750 §2.1 limits a Bearer token to ASCII printable characters
1084/// (and a small set of punctuation), with no whitespace or control
1085/// characters. We enforce that subset: every byte must be in the
1086/// printable ASCII range `0x20..=0x7E` *except* whitespace
1087/// (`0x20` space and `0x09` tab are also disallowed because RFC 6750
1088/// requires `b64token` characters only). The SOH separator used by
1089/// XOAUTH2 is implicitly excluded by the printable-only rule.
1090///
1091/// This is conservative: it will reject some technically-valid token
1092/// shapes that real-world providers nonetheless never emit. In
1093/// practice both Google and Microsoft access tokens consist of
1094/// `[A-Za-z0-9._~+/=-]` and pass this check trivially.
1095///
1096/// Available only with the `xoauth2` cargo feature enabled (default-on).
1097#[cfg(feature = "xoauth2")]
1098pub fn validate_oauth2_token(token: &str) -> Result<(), InvalidInputError> {
1099    if token.is_empty() {
1100        return Err(InvalidInputError::new(
1101            "OAuth2 access token must not be empty",
1102        ));
1103    }
1104    for b in token.bytes() {
1105        // 0x21..=0x7E covers printable ASCII excluding space.
1106        if !(0x21..=0x7E).contains(&b) {
1107            return Err(InvalidInputError::new(
1108                "OAuth2 access token must contain only printable ASCII (no whitespace or control bytes)",
1109            ));
1110        }
1111    }
1112    Ok(())
1113}
1114
1115// -----------------------------------------------------------------------------
1116// OAUTHBEARER (RFC 7628) — feature-gated
1117// -----------------------------------------------------------------------------
1118
1119/// Build the base64-encoded initial response for `AUTH OAUTHBEARER` (RFC 7628).
1120///
1121/// RFC 7628 format: `n,a={user},\x01auth=Bearer {token}\x01\x01`
1122///
1123/// - `n` — GS2 header: no channel binding.
1124/// - `a={user}` — optional authorization identity (authzid). May be empty
1125///   (`n,,`) when the server should use the identity implied by the token.
1126/// - `\x01auth=Bearer {token}\x01\x01` — SASL key=value attributes.
1127///
1128/// The difference from `XOAUTH2`:
1129/// - `XOAUTH2` (Google proprietary): `user={email}\x01auth=Bearer {token}\x01\x01`
1130/// - `OAUTHBEARER` (RFC 7628): `n,a={email},\x01auth=Bearer {token}\x01\x01`
1131///
1132/// Available only with the `oauthbearer` cargo feature (default-on).
1133#[cfg(feature = "oauthbearer")]
1134#[must_use]
1135pub fn build_oauthbearer_initial_response(user: &str, token: &str) -> String {
1136    // "n,a=" + user + ",\x01auth=Bearer " + token + "\x01\x01"
1137    let mut payload = Vec::with_capacity(16 + user.len() + token.len());
1138    payload.extend_from_slice(b"n,a=");
1139    payload.extend_from_slice(user.as_bytes());
1140    payload.push(b',');
1141    payload.push(0x01);
1142    payload.extend_from_slice(b"auth=Bearer ");
1143    payload.extend_from_slice(token.as_bytes());
1144    payload.push(0x01);
1145    payload.push(0x01);
1146    base64_encode(&payload)
1147}
1148
1149// -----------------------------------------------------------------------------
1150// PIPELINING (RFC 2920) — feature-gated
1151// -----------------------------------------------------------------------------
1152
1153/// Return `true` if the EHLO capability lines advertise `PIPELINING` (RFC 2920).
1154///
1155/// When pipelining is available, `send_mail` batches `MAIL FROM`, all
1156/// `RCPT TO` commands, and `DATA` into a single write, reducing the
1157/// number of network round-trips.
1158#[cfg(feature = "pipelining")]
1159#[must_use]
1160pub fn ehlo_advertises_pipelining(caps: &[String]) -> bool {
1161    caps.iter().any(|c| c.eq_ignore_ascii_case("PIPELINING"))
1162}
1163
1164// -----------------------------------------------------------------------------
1165// SMTPUTF8 (RFC 6531) — feature-gated
1166// -----------------------------------------------------------------------------
1167//
1168// SMTPUTF8 lets a session carry mail addresses outside the ASCII
1169// repertoire — e.g. `送信者@例え.jp`. The crate gates the related
1170// helpers behind the `smtputf8` cargo feature: callers who only ever
1171// submit ASCII addresses pay no code-size cost for the UTF-8 validator,
1172// the `MAIL FROM ... SMTPUTF8` formatter, or the capability check.
1173//
1174// When the feature is disabled, none of the items below exist; the
1175// default `validate_address` and `format_mail_from` continue to enforce
1176// ASCII, as they always have.
1177
1178/// Return `true` if the EHLO capability lines advertise the `SMTPUTF8`
1179/// extension (RFC 6531). The check is case-insensitive on the keyword.
1180///
1181/// `capability_lines` is the slice of lines that follows the greeting in
1182/// an `EHLO` reply.
1183#[cfg(feature = "smtputf8")]
1184pub fn ehlo_advertises_smtputf8<S: AsRef<str>>(capability_lines: &[S]) -> bool {
1185    for line in capability_lines {
1186        if let Some(head) = line.as_ref().split_ascii_whitespace().next()
1187            && head.eq_ignore_ascii_case("SMTPUTF8")
1188        {
1189            return true;
1190        }
1191    }
1192    false
1193}
1194
1195/// Validate an envelope address, allowing UTF-8 codepoints in addition
1196/// to the ASCII subset accepted by [`validate_address`].
1197///
1198/// The structural rules are the same as the ASCII validator — the
1199/// address must be non-empty, must not contain CR / LF / NUL, must
1200/// not contain `<`, `>`, ASCII whitespace, ASCII control characters
1201/// (C0 + DEL), or C1 control characters (U+0080-U+009F). Any other
1202/// Unicode codepoint is permitted; the dot-atom structure is left
1203/// for the server to validate.
1204///
1205/// Note that ASCII whitespace (`' '` and `'\t'`) is rejected because
1206/// it would corrupt the SMTP command framing, but other Unicode
1207/// whitespace categories such as U+3000 IDEOGRAPHIC SPACE are
1208/// allowed: they are valid characters in mailbox local parts in
1209/// some scripts and the SMTP layer never tokenizes on them.
1210#[cfg(feature = "smtputf8")]
1211pub fn validate_address_utf8(addr: &str) -> Result<(), InvalidInputError> {
1212    if addr.is_empty() {
1213        return Err(InvalidInputError::new("mail address must not be empty"));
1214    }
1215    // RFC 5321 / 6531 length limits apply on octet counts, not on
1216    // character counts — UTF-8 encoded length is what travels on the
1217    // wire and what counts toward the 254-octet path limit.
1218    if addr.len() > MAX_ADDRESS_LEN {
1219        return Err(InvalidInputError::new(
1220            "mail address exceeds RFC 5321 §4.5.3.1.3 length limit (254 octets)",
1221        ));
1222    }
1223    if let Some(at_pos) = addr.rfind('@') {
1224        let (local, domain) = addr.split_at(at_pos);
1225        let domain = &domain[1..];
1226        if local.len() > MAX_LOCAL_PART_LEN {
1227            return Err(InvalidInputError::new(
1228                "mail address local-part exceeds RFC 5321 §4.5.3.1.1 length limit (64 octets)",
1229            ));
1230        }
1231        if domain.len() > MAX_DOMAIN_LEN {
1232            return Err(InvalidInputError::new(
1233                "mail address domain exceeds RFC 5321 §4.5.3.1.2 length limit (255 octets)",
1234            ));
1235        }
1236    }
1237    for ch in addr.chars() {
1238        match ch {
1239            '\r' | '\n' => {
1240                return Err(InvalidInputError::new(
1241                    "mail address must not contain CR or LF",
1242                ));
1243            }
1244            '\0' => {
1245                return Err(InvalidInputError::new(
1246                    "mail address must not contain a NUL byte",
1247                ));
1248            }
1249            '<' | '>' => {
1250                return Err(InvalidInputError::new(
1251                    "mail address must not contain ASCII < or >",
1252                ));
1253            }
1254            ' ' | '\t' => {
1255                return Err(InvalidInputError::new(
1256                    "mail address must not contain ASCII whitespace",
1257                ));
1258            }
1259            // ASCII control characters (C0 + DEL) other than the
1260            // CR/LF/NUL we caught above. (Tab was caught as
1261            // whitespace above.)
1262            c if (c as u32) < 0x20 || (c as u32) == 0x7F => {
1263                return Err(InvalidInputError::new(
1264                    "mail address must not contain ASCII control characters",
1265                ));
1266            }
1267            // C1 control characters (U+0080-U+009F).
1268            c if (0x80..=0x9F).contains(&(c as u32)) => {
1269                return Err(InvalidInputError::new(
1270                    "mail address must not contain C1 control characters",
1271                ));
1272            }
1273            _ => {}
1274        }
1275    }
1276    Ok(())
1277}
1278
1279/// Format `MAIL FROM:<addr> SMTPUTF8\r\n` as bytes.
1280///
1281/// The `SMTPUTF8` ESMTP parameter (RFC 6531 §3.4) signals to the
1282/// server that the upcoming envelope and message contain UTF-8.
1283/// Servers that did not advertise the extension will reject the
1284/// command; callers should confirm advertisement with
1285/// [`ehlo_advertises_smtputf8`] before invoking this helper.
1286///
1287/// Address validation is the caller's responsibility (use
1288/// [`validate_address_utf8`]); this helper formats unconditionally.
1289#[cfg(feature = "smtputf8")]
1290#[must_use]
1291pub fn format_mail_from_smtputf8(addr: &str) -> Vec<u8> {
1292    // "MAIL FROM:<" (11) + addr + "> SMTPUTF8\r\n" (12) = 23 + addr.len()
1293    let mut out = Vec::with_capacity(23 + addr.len());
1294    out.extend_from_slice(b"MAIL FROM:<");
1295    out.extend_from_slice(addr.as_bytes());
1296    out.extend_from_slice(b"> SMTPUTF8\r\n");
1297    out
1298}