Skip to main content

wasm_smtp_core/
protocol.rs

1//! SMTP wire-format helpers.
2//!
3//! This module is the home for all logic that touches SMTP bytes directly:
4//!
5//! - [`parse_reply_line`] interprets a single CRLF-stripped reply line.
6//! - [`Reply`] aggregates one or more lines into a complete reply.
7//! - [`format_command`] / [`format_command_arg`] produce CRLF-terminated
8//!   command bytes.
9//! - [`dot_stuff_and_terminate`] produces a complete DATA payload from a
10//!   user-supplied body, including the `\r\n.\r\n` terminator.
11//! - [`base64_encode`] is a small, dependency-free encoder used for
12//!   `AUTH LOGIN`. We do not need a decoder.
13//! - The `validate_*` functions reject caller input that would inject CRLF
14//!   sequences or otherwise violate SMTP grammar before any byte is sent.
15//!
16//! None of these helpers perform I/O; they operate on borrowed buffers and
17//! return owned bytes or errors.
18
19use crate::error::{InvalidInputError, ProtocolError};
20
21/// Maximum length of a single reply line, excluding CRLF.
22///
23/// RFC 5321 §4.5.3.1.5 sets a 512-octet limit for reply lines including
24/// CRLF. We accept up to 998 octets of text plus CRLF (the body line limit
25/// from §4.5.3.1.6) to be lenient toward real-world server software that
26/// occasionally exceeds the strict reply-line limit.
27pub const MAX_REPLY_LINE_LEN: usize = 998;
28
29/// Maximum number of lines accepted in a single multi-line reply.
30///
31/// SMTP does not specify a hard cap, but a reasonable defensive limit
32/// prevents an unbounded server from causing unbounded allocation.
33pub const MAX_REPLY_LINES: usize = 128;
34
35/// Maximum length of an envelope address (RFC 5321 §4.5.3.1.3).
36///
37/// The standard's `Path` limit is 256 octets, including the angle
38/// brackets that frame the address on the wire. With brackets
39/// stripped, the validated address may be at most 254 octets.
40pub const MAX_ADDRESS_LEN: usize = 254;
41
42/// Maximum length of an address local-part (RFC 5321 §4.5.3.1.1).
43pub const MAX_LOCAL_PART_LEN: usize = 64;
44
45/// Maximum length of an address domain (RFC 5321 §4.5.3.1.2).
46pub const MAX_DOMAIN_LEN: usize = 255;
47
48// -----------------------------------------------------------------------------
49// Reply parsing
50// -----------------------------------------------------------------------------
51
52/// One parsed line of an SMTP reply.
53#[derive(Debug, Clone, PartialEq, Eq)]
54pub struct ReplyLine<'a> {
55    /// The three-digit reply code.
56    pub code: u16,
57    /// `true` if this line was terminated with a space (last line of a
58    /// reply); `false` if terminated with `-` (continuation).
59    pub is_last: bool,
60    /// The text portion after the separator. May be empty.
61    pub text: &'a [u8],
62}
63
64/// Parse a single CRLF-stripped reply line.
65///
66/// The input must not contain the terminating CRLF.
67pub fn parse_reply_line(line: &[u8]) -> Result<ReplyLine<'_>, ProtocolError> {
68    if line.len() < 3 {
69        return Err(malformed(line));
70    }
71    let d0 = ascii_digit_value(line[0]).ok_or_else(|| malformed(line))?;
72    let d1 = ascii_digit_value(line[1]).ok_or_else(|| malformed(line))?;
73    let d2 = ascii_digit_value(line[2]).ok_or_else(|| malformed(line))?;
74    let code = u16::from(d0) * 100 + u16::from(d1) * 10 + u16::from(d2);
75
76    if line.len() == 3 {
77        // RFC 5321 requires a separator, but a code-only line with no text
78        // and no separator is unambiguous: treat it as a last line.
79        return Ok(ReplyLine {
80            code,
81            is_last: true,
82            text: &[],
83        });
84    }
85    let (is_last, text) = match line[3] {
86        b' ' => (true, &line[4..]),
87        b'-' => (false, &line[4..]),
88        _ => return Err(malformed(line)),
89    };
90    Ok(ReplyLine {
91        code,
92        is_last,
93        text,
94    })
95}
96
97fn ascii_digit_value(b: u8) -> Option<u8> {
98    if b.is_ascii_digit() {
99        Some(b - b'0')
100    } else {
101        None
102    }
103}
104
105fn malformed(line: &[u8]) -> ProtocolError {
106    ProtocolError::Malformed(String::from_utf8_lossy(line).into_owned())
107}
108
109/// An enhanced status code from RFC 3463, parsed out of an SMTP reply
110/// when the server has advertised the `ENHANCEDSTATUSCODES` extension
111/// (RFC 2034).
112///
113/// Enhanced codes are formatted `class.subject.detail`, for example
114/// `5.7.1` (relay access denied) or `4.7.0` (security feature
115/// temporarily unavailable). The basic three-digit reply code (e.g.
116/// `550`) and the enhanced code share the leading digit (the
117/// "class"); the remaining two fields refine the diagnosis far
118/// beyond what the basic code carries.
119///
120/// This type is preserved across the [`Reply`] on which it is parsed,
121/// and reproduced in [`crate::ProtocolError::UnexpectedCode`] when an
122/// unexpected reply triggers an error. Callers can use the structured
123/// fields to make routing decisions ("if subject is 5.1.* the address
124/// is permanently bad; if 4.x.x retry later").
125///
126/// Per RFC 3463 §2:
127/// - `class` is one of 2, 4, or 5 (success / persistent transient /
128///   permanent).
129/// - `subject` and `detail` are 0–999.
130#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
131pub struct EnhancedStatus {
132    /// Leading class digit (2, 4, or 5).
133    pub class: u8,
134    /// Second field: the broad subject category (e.g. `1` = address,
135    /// `7` = security/policy).
136    pub subject: u16,
137    /// Third field: the specific detail within the subject.
138    pub detail: u16,
139}
140
141impl EnhancedStatus {
142    /// Format as `class.subject.detail`. This is the wire form RFC 3463
143    /// uses, with the leading dot-decimal and no padding.
144    #[must_use]
145    pub fn to_dotted(&self) -> String {
146        format!("{}.{}.{}", self.class, self.subject, self.detail)
147    }
148}
149
150impl core::fmt::Display for EnhancedStatus {
151    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
152        write!(f, "{}.{}.{}", self.class, self.subject, self.detail)
153    }
154}
155
156/// Try to parse an [`EnhancedStatus`] from the start of a reply line's
157/// text portion.
158///
159/// The expected format is `"x.y.z"` followed by either end-of-string,
160/// whitespace, or any other non-digit-non-dot byte. Invalid prefixes
161/// — including missing dots, non-digit characters, or class digits
162/// other than `2`, `4`, `5` — return `None`. The caller advances
163/// past the parsed prefix only when this returns `Some`.
164///
165/// Returns `(status, prefix_len)` where `prefix_len` is the number of
166/// bytes consumed from `text`, including any single trailing
167/// whitespace octet. This lets [`Reply::joined_text`] strip the code
168/// before showing the user-facing message.
169fn parse_enhanced_status_prefix(text: &str) -> Option<(EnhancedStatus, usize)> {
170    // We require at least 5 chars (`x.y.z`) and a class digit in {2,4,5}.
171    let bytes = text.as_bytes();
172    if bytes.len() < 5 {
173        return None;
174    }
175    let class_byte = bytes[0];
176    if !matches!(class_byte, b'2' | b'4' | b'5') || bytes[1] != b'.' {
177        return None;
178    }
179
180    // subject: digits, terminated by '.'.
181    let mut i = 2;
182    let subj_start = i;
183    while i < bytes.len() && bytes[i].is_ascii_digit() {
184        i += 1;
185    }
186    if i == subj_start || i >= bytes.len() || bytes[i] != b'.' {
187        return None;
188    }
189    let subject: u16 = text[subj_start..i].parse().ok()?;
190    i += 1;
191
192    // detail: digits, terminated by whitespace or end of string.
193    let det_start = i;
194    while i < bytes.len() && bytes[i].is_ascii_digit() {
195        i += 1;
196    }
197    if i == det_start {
198        return None;
199    }
200    let detail: u16 = text[det_start..i].parse().ok()?;
201
202    // The terminator: end-of-string, single space, or single tab.
203    // We consume one whitespace byte so the user-facing message starts
204    // cleanly. Any other non-digit byte is allowed but not consumed.
205    let prefix_len = if i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
206        i + 1
207    } else {
208        i
209    };
210
211    Some((
212        EnhancedStatus {
213            class: class_byte - b'0',
214            subject,
215            detail,
216        },
217        prefix_len,
218    ))
219}
220
221/// A complete SMTP reply, possibly assembled from multiple continuation
222/// lines.
223#[derive(Debug, Clone, PartialEq, Eq)]
224pub struct Reply {
225    /// The three-digit reply code, shared by every line of the reply.
226    pub code: u16,
227    /// One entry per line, in the order received. Each entry is the line's
228    /// text portion (after the code and separator) decoded as UTF-8 with
229    /// invalid sequences replaced by `U+FFFD`. The text retains any
230    /// enhanced status code prefix; use [`Self::message_text`] to obtain
231    /// the same text with the prefix stripped, or [`Self::enhanced`] to
232    /// obtain the parsed code itself.
233    pub lines: Vec<String>,
234    /// Parsed enhanced status code (RFC 3463), set only when the server
235    /// has advertised `ENHANCEDSTATUSCODES` for this session. The code
236    /// is taken from the first reply line; multi-line replies that
237    /// disagree on the code are flagged at parse time, so this is well
238    /// defined when present.
239    enhanced: Option<EnhancedStatus>,
240}
241
242impl Reply {
243    /// Construct a reply with the given code and lines, with no enhanced
244    /// status code attached. The client adds an enhanced code via the
245    /// internal `attach_enhanced_status` setter when the session has
246    /// `ENHANCEDSTATUSCODES` enabled.
247    #[must_use]
248    pub fn new(code: u16, lines: Vec<String>) -> Self {
249        Self {
250            code,
251            lines,
252            enhanced: None,
253        }
254    }
255
256    /// The leading digit of the reply code, useful for class-based checks.
257    pub fn class(&self) -> u8 {
258        u8::try_from(self.code / 100).unwrap_or(0)
259    }
260
261    /// Reply text concatenated with `\n`. Suitable for diagnostics.
262    /// If an enhanced status code prefix is present, it is preserved in
263    /// the output; use [`Self::message_text`] for a presentation that
264    /// hides it.
265    ///
266    /// # Caveat for log handlers
267    ///
268    /// The returned `String` may contain `\n` (used internally to
269    /// separate multi-line replies). It does **not** contain `\r` —
270    /// CRLF is stripped by the reply parser before storage — but
271    /// applications that forward this text to line-oriented loggers
272    /// (`syslog`, journald, structured JSON, etc.) should still
273    /// escape or render newlines explicitly to avoid log injection
274    /// where one logical reply renders as multiple log records. The
275    /// same caveat applies to anything else that consumes the
276    /// `Display` output of [`crate::ProtocolError`] or
277    /// [`crate::AuthError`], since those types embed reply text.
278    pub fn joined_text(&self) -> String {
279        self.lines.join("\n")
280    }
281
282    /// Reply text with any enhanced status code prefix stripped from
283    /// each line. Suitable for human-facing error messages where the
284    /// code is shown separately. Lines that have no enhanced prefix
285    /// are returned unchanged.
286    pub fn message_text(&self) -> String {
287        if self.enhanced.is_none() {
288            return self.joined_text();
289        }
290        let stripped: Vec<&str> = self
291            .lines
292            .iter()
293            .map(|line| match parse_enhanced_status_prefix(line) {
294                Some((_, prefix_len)) => &line[prefix_len..],
295                None => line.as_str(),
296            })
297            .collect();
298        stripped.join("\n")
299    }
300
301    /// Parsed enhanced status code, if the server has provided one and
302    /// the session has it enabled.
303    #[must_use]
304    pub fn enhanced(&self) -> Option<EnhancedStatus> {
305        self.enhanced
306    }
307
308    /// Set the enhanced status code on this reply. Used by the client
309    /// after the EHLO capability set has been confirmed to include
310    /// `ENHANCEDSTATUSCODES`.
311    pub(crate) fn attach_enhanced_status(&mut self, status: EnhancedStatus) {
312        self.enhanced = Some(status);
313    }
314
315    /// Iterate over the trimmed text of each line. Useful for parsing EHLO
316    /// capabilities, where the first line contains the greeting and the
317    /// remaining lines each name a single capability (e.g. `AUTH LOGIN`,
318    /// `PIPELINING`, `8BITMIME`).
319    pub fn iter_lines(&self) -> impl Iterator<Item = &str> {
320        self.lines.iter().map(String::as_str)
321    }
322
323    /// Parse an enhanced status code from the first line's text, if
324    /// present. Used by the client to populate `self.enhanced` only when
325    /// the session has `ENHANCEDSTATUSCODES` enabled.
326    #[must_use]
327    pub fn try_parse_enhanced(&self) -> Option<EnhancedStatus> {
328        self.lines
329            .first()
330            .and_then(|line| parse_enhanced_status_prefix(line).map(|(s, _)| s))
331    }
332}
333
334// -----------------------------------------------------------------------------
335// Command formatting
336// -----------------------------------------------------------------------------
337
338/// Format a command with no arguments, terminated with CRLF.
339///
340/// Example: `format_command("QUIT")` yields `b"QUIT\r\n"`.
341pub fn format_command(verb: &str) -> Vec<u8> {
342    let mut buf = Vec::with_capacity(verb.len() + 2);
343    buf.extend_from_slice(verb.as_bytes());
344    buf.extend_from_slice(b"\r\n");
345    buf
346}
347
348/// Format a command with a single argument, terminated with CRLF.
349///
350/// Example: `format_command_arg("EHLO", "client.example.com")` yields
351/// `b"EHLO client.example.com\r\n"`.
352///
353/// Callers are responsible for argument validation; this function does no
354/// escaping.
355pub fn format_command_arg(verb: &str, arg: &str) -> Vec<u8> {
356    let mut buf = Vec::with_capacity(verb.len() + 1 + arg.len() + 2);
357    buf.extend_from_slice(verb.as_bytes());
358    buf.push(b' ');
359    buf.extend_from_slice(arg.as_bytes());
360    buf.extend_from_slice(b"\r\n");
361    buf
362}
363
364/// Format `MAIL FROM:<addr>\r\n`. The caller must validate `addr` first.
365pub fn format_mail_from(addr: &str) -> Vec<u8> {
366    let mut buf = Vec::with_capacity(13 + addr.len() + 2);
367    buf.extend_from_slice(b"MAIL FROM:<");
368    buf.extend_from_slice(addr.as_bytes());
369    buf.extend_from_slice(b">\r\n");
370    buf
371}
372
373/// Format `RCPT TO:<addr>\r\n`. The caller must validate `addr` first.
374pub fn format_rcpt_to(addr: &str) -> Vec<u8> {
375    let mut buf = Vec::with_capacity(11 + addr.len() + 2);
376    buf.extend_from_slice(b"RCPT TO:<");
377    buf.extend_from_slice(addr.as_bytes());
378    buf.extend_from_slice(b">\r\n");
379    buf
380}
381
382// -----------------------------------------------------------------------------
383// DATA payload
384// -----------------------------------------------------------------------------
385
386/// Produce the DATA-phase byte stream from a user-supplied body.
387///
388/// The output:
389///
390/// 1. has any line beginning with `.` doubled (RFC 5321 §4.5.2 dot-stuffing);
391/// 2. is guaranteed to end with `\r\n` (a CRLF is appended if the input
392///    does not already end with one);
393/// 3. is followed by the end-of-data terminator `.\r\n`.
394///
395/// The body is expected to be CRLF-normalized. The function does not
396/// translate lone LF or CR bytes; callers needing such translation should
397/// preprocess the body.
398///
399/// The body's bytes are not inspected beyond `\r`, `\n`, and `.`, so the
400/// payload may contain any 8-bit data the server is willing to accept (for
401/// example, after a `250 8BITMIME` capability advertisement).
402pub fn dot_stuff_and_terminate(body: &[u8]) -> Vec<u8> {
403    let mut out = Vec::with_capacity(body.len() + 8);
404    let mut at_line_start = true;
405    let mut prev: u8 = 0;
406    for &b in body {
407        if at_line_start && b == b'.' {
408            out.push(b'.');
409        }
410        out.push(b);
411        at_line_start = prev == b'\r' && b == b'\n';
412        prev = b;
413    }
414    if !out.ends_with(b"\r\n") {
415        out.extend_from_slice(b"\r\n");
416    }
417    out.extend_from_slice(b".\r\n");
418    out
419}
420
421// -----------------------------------------------------------------------------
422// Base64
423// -----------------------------------------------------------------------------
424
425const BASE64_ALPHABET: &[u8; 64] =
426    b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
427
428/// Standard base64 encoding (RFC 4648), padded with `=`.
429///
430/// Used for `AUTH LOGIN`. We deliberately avoid pulling in an external
431/// base64 dependency; the implementation is small and easy to audit.
432pub fn base64_encode(input: &[u8]) -> String {
433    let mut out = String::with_capacity(input.len().div_ceil(3) * 4);
434    let chunks = input.chunks_exact(3);
435    let rem = chunks.remainder();
436    for chunk in chunks {
437        let n = (u32::from(chunk[0]) << 16) | (u32::from(chunk[1]) << 8) | u32::from(chunk[2]);
438        push_b64(&mut out, n, 4);
439    }
440    match rem.len() {
441        0 => {}
442        1 => {
443            let n = u32::from(rem[0]) << 16;
444            push_b64(&mut out, n, 2);
445            out.push_str("==");
446        }
447        2 => {
448            let n = (u32::from(rem[0]) << 16) | (u32::from(rem[1]) << 8);
449            push_b64(&mut out, n, 3);
450            out.push('=');
451        }
452        _ => unreachable!(),
453    }
454    out
455}
456
457fn push_b64(out: &mut String, n: u32, count: u8) {
458    // count is the number of significant base64 characters to emit (2..=4)
459    // shifts: index 0 -> 18, 1 -> 12, 2 -> 6, 3 -> 0
460    for i in 0..count {
461        let shift = 18 - 6 * i;
462        let idx = ((n >> shift) & 0x3F) as usize;
463        out.push(char::from(BASE64_ALPHABET[idx]));
464    }
465}
466
467// -----------------------------------------------------------------------------
468// Input validation
469// -----------------------------------------------------------------------------
470
471/// Validate a mail address (RFC 5321 reverse-path / forward-path content).
472///
473/// The check is intentionally conservative: it rejects the characters that
474/// would either inject SMTP commands or violate the framing of `<addr>`.
475/// Validate an envelope address (used in MAIL FROM / RCPT TO) against
476/// RFC 5321 grammar and the length limits in §4.5.3.1.
477///
478/// The check is conservative — it does not parse RFC 5321 grammar in
479/// detail, but it forbids any byte that would corrupt the command
480/// framing, and rejects values that exceed the standard's per-field
481/// length limits.
482///
483/// In particular:
484///
485/// - non-empty;
486/// - ASCII only — UTF-8 addresses require the `smtputf8` feature
487///   (which exposes a separate UTF-8-permissive validator);
488/// - no `\r`, `\n`, or `\0`;
489/// - no `<`, `>`, or space (which would corrupt the angle-bracket framing);
490/// - the whole address (local-part + `@` + domain) must be no longer
491///   than 254 octets — RFC 5321 §4.5.3.1.3 specifies 256 for the
492///   `Path` token including angle brackets, leaving 254 for the
493///   bracket-stripped address;
494/// - if an `@` is present, the local-part is no longer than 64 octets
495///   and the domain is no longer than 255 octets (§4.5.3.1.1 /
496///   §4.5.3.1.2). These limits are advisory: many real-world relays
497///   accept longer values, but rejecting at the client boundary
498///   prevents a misformed input from generating a wire `MAIL FROM`
499///   line that exceeds the SMTP line-length limit (§4.5.3.1.5).
500pub fn validate_address(addr: &str) -> Result<(), InvalidInputError> {
501    if addr.is_empty() {
502        return Err(InvalidInputError::new("mail address must not be empty"));
503    }
504    if !addr.is_ascii() {
505        return Err(InvalidInputError::new(
506            "mail address must be ASCII (SMTPUTF8 is not supported)",
507        ));
508    }
509    if addr.len() > MAX_ADDRESS_LEN {
510        return Err(InvalidInputError::new(
511            "mail address exceeds RFC 5321 §4.5.3.1.3 length limit (254 octets)",
512        ));
513    }
514    if let Some(at_pos) = addr.rfind('@') {
515        let (local, domain) = addr.split_at(at_pos);
516        // domain still has the leading '@' — strip it.
517        let domain = &domain[1..];
518        if local.len() > MAX_LOCAL_PART_LEN {
519            return Err(InvalidInputError::new(
520                "mail address local-part exceeds RFC 5321 §4.5.3.1.1 length limit (64 octets)",
521            ));
522        }
523        if domain.len() > MAX_DOMAIN_LEN {
524            return Err(InvalidInputError::new(
525                "mail address domain exceeds RFC 5321 §4.5.3.1.2 length limit (255 octets)",
526            ));
527        }
528    }
529    for b in addr.bytes() {
530        match b {
531            b'\r' | b'\n' => {
532                return Err(InvalidInputError::new(
533                    "mail address must not contain CR or LF",
534                ));
535            }
536            0 => {
537                return Err(InvalidInputError::new(
538                    "mail address must not contain a NUL byte",
539                ));
540            }
541            b'<' | b'>' => {
542                return Err(InvalidInputError::new(
543                    "mail address must not contain '<' or '>'",
544                ));
545            }
546            b' ' | b'\t' => {
547                return Err(InvalidInputError::new(
548                    "mail address must not contain whitespace",
549                ));
550            }
551            _ => {}
552        }
553    }
554    Ok(())
555}
556
557/// Validate the domain argument supplied to `EHLO`.
558///
559/// Accepts any non-empty sequence of printable ASCII (0x21..=0x7E). Address
560/// literals (e.g. `[192.0.2.1]`) and dotted FQDNs both pass. The check is
561/// intentionally lenient: its job is to prevent CRLF injection, not to
562/// enforce DNS syntax.
563pub fn validate_ehlo_domain(domain: &str) -> Result<(), InvalidInputError> {
564    if domain.is_empty() {
565        return Err(InvalidInputError::new("EHLO domain must not be empty"));
566    }
567    if !domain.is_ascii() {
568        return Err(InvalidInputError::new("EHLO domain must be ASCII"));
569    }
570    if domain.bytes().any(|b| !(0x21..=0x7E).contains(&b)) {
571        return Err(InvalidInputError::new(
572            "EHLO domain must contain only printable ASCII characters",
573        ));
574    }
575    Ok(())
576}
577
578/// Validate the username supplied to `AUTH LOGIN`.
579///
580/// As of v0.5.0 this is a thin alias for [`validate_plain_username`]:
581/// the two SASL mechanisms (PLAIN and LOGIN) accept the same shape
582/// of credential string and the same constraints apply. NUL bytes
583/// are rejected because they would corrupt the SASL framing on the
584/// post-base64 server side.
585///
586/// The function is retained for source compatibility with v0.4.x
587/// callers, but new code should use [`validate_plain_username`]
588/// directly. A future major release may remove this alias.
589pub fn validate_login_username(user: &str) -> Result<(), InvalidInputError> {
590    validate_plain_username(user)
591}
592
593/// Validate the password supplied to `AUTH LOGIN`.
594///
595/// As of v0.5.0 this is a thin alias for [`validate_plain_password`].
596/// See [`validate_login_username`] for the rationale.
597pub fn validate_login_password(pass: &str) -> Result<(), InvalidInputError> {
598    validate_plain_password(pass)
599}
600
601// -----------------------------------------------------------------------------
602// EHLO capability inspection
603// -----------------------------------------------------------------------------
604
605/// Return `true` if the EHLO capability lines advertise an `AUTH` mechanism
606/// named `mechanism`. The check is case-insensitive on both the keyword
607/// and the mechanism name.
608///
609/// `capability_lines` is the slice of lines that follows the greeting in
610/// an `EHLO` reply: each line is one extension (e.g. `"AUTH LOGIN PLAIN"`,
611/// `"PIPELINING"`, `"8BITMIME"`).
612pub fn ehlo_advertises_auth<S: AsRef<str>>(capability_lines: &[S], mechanism: &str) -> bool {
613    for line in capability_lines {
614        let mut parts = line.as_ref().split_ascii_whitespace();
615        let Some(head) = parts.next() else { continue };
616        if !head.eq_ignore_ascii_case("AUTH") {
617            continue;
618        }
619        for mech in parts {
620            if mech.eq_ignore_ascii_case(mechanism) {
621                return true;
622            }
623        }
624    }
625    false
626}
627
628/// Return `true` if the EHLO capability lines advertise the `STARTTLS`
629/// extension (RFC 3207). The check is case-insensitive on the keyword.
630///
631/// `capability_lines` is the slice of lines that follows the greeting in
632/// an `EHLO` reply; each line is one extension keyword optionally
633/// followed by parameters.
634pub fn ehlo_advertises_starttls<S: AsRef<str>>(capability_lines: &[S]) -> bool {
635    for line in capability_lines {
636        if let Some(head) = line.as_ref().split_ascii_whitespace().next()
637            && head.eq_ignore_ascii_case("STARTTLS")
638        {
639            return true;
640        }
641    }
642    false
643}
644
645/// Return `true` if the EHLO capability lines advertise the
646/// `ENHANCEDSTATUSCODES` extension (RFC 2034). The check is
647/// case-insensitive on the keyword.
648///
649/// When this is `true` for a session, the SMTP client parses the
650/// `class.subject.detail` prefix off each reply and exposes it as
651/// [`EnhancedStatus`] both on the [`Reply`] itself and on
652/// [`crate::ProtocolError::UnexpectedCode`]. When the keyword is not
653/// advertised, the same byte sequence in a reply (a stray "5.1.1"
654/// for instance) is left as-is in the message text and not parsed.
655pub fn ehlo_advertises_enhanced_status_codes<S: AsRef<str>>(capability_lines: &[S]) -> bool {
656    for line in capability_lines {
657        if let Some(head) = line.as_ref().split_ascii_whitespace().next()
658            && head.eq_ignore_ascii_case("ENHANCEDSTATUSCODES")
659        {
660            return true;
661        }
662    }
663    false
664}
665
666// -----------------------------------------------------------------------------
667// Authentication mechanisms
668// -----------------------------------------------------------------------------
669
670/// SASL authentication mechanisms supported by this client.
671///
672/// Today the crate implements `PLAIN` (RFC 4616) and `LOGIN` (the
673/// historical mechanism used by many submission servers). The enum is
674/// `non_exhaustive` so that future additions (e.g. `XOAUTH2`,
675/// `SCRAM-SHA-256`) do not require a major version bump.
676///
677/// `PLAIN` is preferred when both are advertised: it is one network
678/// round-trip rather than two, and is an IETF-standard SASL mechanism.
679/// `LOGIN` is retained for compatibility with older submission servers
680/// that advertise only it.
681#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
682#[non_exhaustive]
683pub enum AuthMechanism {
684    /// SASL `PLAIN` (RFC 4616). Sends `\0user\0pass` base64-encoded as
685    /// the initial response, completing in a single round-trip.
686    Plain,
687    /// `LOGIN`. Sends username and password as separate base64 lines
688    /// in response to two `334` server prompts.
689    Login,
690    /// SASL `XOAUTH2` (Google / Microsoft OAuth 2.0 SMTP extension).
691    /// Sends `user={user}\x01auth=Bearer {token}\x01\x01`
692    /// base64-encoded as the initial response. The "credential" passed
693    /// to `login_with` for this mechanism is an OAuth 2.0 access
694    /// token, not a static password — auto-selection by `login()`
695    /// deliberately does NOT pick this mechanism for that reason.
696    XOAuth2,
697}
698
699impl AuthMechanism {
700    /// SMTP-on-the-wire keyword for this mechanism, as it appears after
701    /// `AUTH` in an `EHLO` advertisement (`"PLAIN"`, `"LOGIN"`,
702    /// `"XOAUTH2"`).
703    #[must_use]
704    pub const fn name(self) -> &'static str {
705        match self {
706            Self::Plain => "PLAIN",
707            Self::Login => "LOGIN",
708            Self::XOAuth2 => "XOAUTH2",
709        }
710    }
711}
712
713impl core::fmt::Display for AuthMechanism {
714    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
715        f.write_str(self.name())
716    }
717}
718
719/// Pick the best mechanism advertised by the server, preferring `PLAIN`
720/// over `LOGIN`. Returns `None` if the server advertised neither.
721///
722/// Use this when you want a single `login` call to do the right thing
723/// across the variety of submission servers in deployment. If you need
724/// to lock in a specific mechanism (for example, to reproduce a
725/// production failure in a test), call [`crate::client::SmtpClient::login_with`]
726/// directly.
727pub fn select_auth_mechanism<S: AsRef<str>>(capability_lines: &[S]) -> Option<AuthMechanism> {
728    if ehlo_advertises_auth(capability_lines, "PLAIN") {
729        Some(AuthMechanism::Plain)
730    } else if ehlo_advertises_auth(capability_lines, "LOGIN") {
731        Some(AuthMechanism::Login)
732    } else {
733        None
734    }
735}
736
737/// Build the SASL `PLAIN` initial response for the given credentials.
738///
739/// The result is the base64 encoding of `\0user\0pass` (RFC 4616 §2).
740/// The empty authorization identity (the part before the first NUL)
741/// means "act as the authenticated user", which is the correct default
742/// for SMTP submission.
743///
744/// The caller is responsible for the surrounding command framing; the
745/// full on-wire bytes are `b"AUTH PLAIN " + result + b"\r\n"`.
746///
747/// # Encoding
748///
749/// `user` and `pass` are encoded as their UTF-8 bytes. RFC 4616 mandates
750/// UTF-8 for both fields; this matches Rust's `String` representation.
751#[must_use]
752pub fn build_auth_plain_initial_response(user: &str, pass: &str) -> String {
753    let mut payload = Vec::with_capacity(2 + user.len() + pass.len());
754    payload.push(0u8); // empty authzid
755    payload.extend_from_slice(user.as_bytes());
756    payload.push(0u8);
757    payload.extend_from_slice(pass.as_bytes());
758    base64_encode(&payload)
759}
760
761/// Validate the username supplied to a SASL `PLAIN` `AUTH` exchange.
762///
763/// RFC 4616 forbids NUL bytes in the authcid (NUL is the field
764/// separator). Empty usernames are also refused: while RFC 4616 itself
765/// allows them, no SMTP submission server accepts an empty login, and
766/// rejecting them up-front turns a server-side failure into a
767/// programmer-visible one.
768pub fn validate_plain_username(user: &str) -> Result<(), InvalidInputError> {
769    if user.is_empty() {
770        return Err(InvalidInputError::new("AUTH username must not be empty"));
771    }
772    if user.bytes().any(|b| b == 0) {
773        return Err(InvalidInputError::new(
774            "AUTH username must not contain a NUL byte",
775        ));
776    }
777    Ok(())
778}
779
780/// Validate the password supplied to a SASL `PLAIN` `AUTH` exchange.
781///
782/// As with [`validate_plain_username`], NUL bytes are forbidden because
783/// they would corrupt the SASL framing.
784pub fn validate_plain_password(pass: &str) -> Result<(), InvalidInputError> {
785    if pass.is_empty() {
786        return Err(InvalidInputError::new("AUTH password must not be empty"));
787    }
788    if pass.bytes().any(|b| b == 0) {
789        return Err(InvalidInputError::new(
790            "AUTH password must not contain a NUL byte",
791        ));
792    }
793    Ok(())
794}
795
796// -----------------------------------------------------------------------------
797// XOAUTH2 (Google / Microsoft OAuth 2.0 SASL profile)
798//
799// The three helpers in this section are feature-gated behind
800// `xoauth2` (default-on). Disabling the feature removes them
801// entirely along with the corresponding `SmtpClient::login_xoauth2`
802// method and the `XOAuth2` arm of `login_with`. The
803// `AuthMechanism::XOAuth2` and `SmtpOp::AuthXOAuth2` enum variants
804// remain present in either configuration; both enums are
805// `non_exhaustive` and the variants without the feature are simply
806// unreachable through the public API.
807// -----------------------------------------------------------------------------
808
809/// Build the SASL `XOAUTH2` initial response.
810///
811/// The wire format, before base64, is:
812///
813/// ```text
814/// user={user}\x01auth=Bearer {token}\x01\x01
815/// ```
816///
817/// where `\x01` is the SOH (Ctrl-A) byte that separates fields. The
818/// `Bearer ` prefix is fixed and case-sensitive. Both the user and the
819/// token are passed through verbatim; the caller must have validated
820/// them with [`validate_xoauth2_user`] and [`validate_oauth2_token`]
821/// first.
822///
823/// The returned string is the base64 encoding of the entire payload,
824/// suitable for placement after `AUTH XOAUTH2 ` on the wire. The
825/// caller is responsible for the surrounding command framing.
826///
827/// Available only with the `xoauth2` cargo feature enabled (default-on).
828#[cfg(feature = "xoauth2")]
829#[must_use]
830pub fn build_xoauth2_initial_response(user: &str, token: &str) -> String {
831    // Length: "user=" (5) + user + 1 (SOH) + "auth=Bearer " (12) + token
832    //         + 1 (SOH) + 1 (final SOH) = 19 + user.len() + token.len()
833    let mut payload = Vec::with_capacity(19 + user.len() + token.len());
834    payload.extend_from_slice(b"user=");
835    payload.extend_from_slice(user.as_bytes());
836    payload.push(0x01);
837    payload.extend_from_slice(b"auth=Bearer ");
838    payload.extend_from_slice(token.as_bytes());
839    payload.push(0x01);
840    payload.push(0x01);
841    base64_encode(&payload)
842}
843
844/// Validate the username supplied to a SASL `XOAUTH2` exchange.
845///
846/// XOAUTH2 (Google / Microsoft) does not formally constrain the user
847/// field, but to prevent injection of the SOH separator, NUL, CR,
848/// or LF into the SASL payload, we forbid those bytes. Empty
849/// usernames are also rejected.
850///
851/// Available only with the `xoauth2` cargo feature enabled (default-on).
852#[cfg(feature = "xoauth2")]
853pub fn validate_xoauth2_user(user: &str) -> Result<(), InvalidInputError> {
854    if user.is_empty() {
855        return Err(InvalidInputError::new("XOAUTH2 user must not be empty"));
856    }
857    if user.bytes().any(|b| matches!(b, 0 | b'\r' | b'\n' | 0x01)) {
858        return Err(InvalidInputError::new(
859            "XOAUTH2 user must not contain NUL, CR, LF, or SOH",
860        ));
861    }
862    Ok(())
863}
864
865/// Validate an OAuth 2.0 access token before sending it on the wire.
866///
867/// RFC 6750 §2.1 limits a Bearer token to ASCII printable characters
868/// (and a small set of punctuation), with no whitespace or control
869/// characters. We enforce that subset: every byte must be in the
870/// printable ASCII range `0x20..=0x7E` *except* whitespace
871/// (`0x20` space and `0x09` tab are also disallowed because RFC 6750
872/// requires `b64token` characters only). The SOH separator used by
873/// XOAUTH2 is implicitly excluded by the printable-only rule.
874///
875/// This is conservative: it will reject some technically-valid token
876/// shapes that real-world providers nonetheless never emit. In
877/// practice both Google and Microsoft access tokens consist of
878/// `[A-Za-z0-9._~+/=-]` and pass this check trivially.
879///
880/// Available only with the `xoauth2` cargo feature enabled (default-on).
881#[cfg(feature = "xoauth2")]
882pub fn validate_oauth2_token(token: &str) -> Result<(), InvalidInputError> {
883    if token.is_empty() {
884        return Err(InvalidInputError::new(
885            "OAuth2 access token must not be empty",
886        ));
887    }
888    for b in token.bytes() {
889        // 0x21..=0x7E covers printable ASCII excluding space.
890        if !(0x21..=0x7E).contains(&b) {
891            return Err(InvalidInputError::new(
892                "OAuth2 access token must contain only printable ASCII (no whitespace or control bytes)",
893            ));
894        }
895    }
896    Ok(())
897}
898
899// -----------------------------------------------------------------------------
900// SMTPUTF8 (RFC 6531) — feature-gated
901// -----------------------------------------------------------------------------
902//
903// SMTPUTF8 lets a session carry mail addresses outside the ASCII
904// repertoire — e.g. `送信者@例え.jp`. The crate gates the related
905// helpers behind the `smtputf8` cargo feature: callers who only ever
906// submit ASCII addresses pay no code-size cost for the UTF-8 validator,
907// the `MAIL FROM ... SMTPUTF8` formatter, or the capability check.
908//
909// When the feature is disabled, none of the items below exist; the
910// default `validate_address` and `format_mail_from` continue to enforce
911// ASCII, as they always have.
912
913/// Return `true` if the EHLO capability lines advertise the `SMTPUTF8`
914/// extension (RFC 6531). The check is case-insensitive on the keyword.
915///
916/// `capability_lines` is the slice of lines that follows the greeting in
917/// an `EHLO` reply.
918#[cfg(feature = "smtputf8")]
919pub fn ehlo_advertises_smtputf8<S: AsRef<str>>(capability_lines: &[S]) -> bool {
920    for line in capability_lines {
921        if let Some(head) = line.as_ref().split_ascii_whitespace().next()
922            && head.eq_ignore_ascii_case("SMTPUTF8")
923        {
924            return true;
925        }
926    }
927    false
928}
929
930/// Validate an envelope address, allowing UTF-8 codepoints in addition
931/// to the ASCII subset accepted by [`validate_address`].
932///
933/// The structural rules are the same as the ASCII validator — the
934/// address must be non-empty, must not contain CR / LF / NUL, must
935/// not contain `<`, `>`, ASCII whitespace, ASCII control characters
936/// (C0 + DEL), or C1 control characters (U+0080-U+009F). Any other
937/// Unicode codepoint is permitted; the dot-atom structure is left
938/// for the server to validate.
939///
940/// Note that ASCII whitespace (`' '` and `'\t'`) is rejected because
941/// it would corrupt the SMTP command framing, but other Unicode
942/// whitespace categories such as U+3000 IDEOGRAPHIC SPACE are
943/// allowed: they are valid characters in mailbox local parts in
944/// some scripts and the SMTP layer never tokenizes on them.
945#[cfg(feature = "smtputf8")]
946pub fn validate_address_utf8(addr: &str) -> Result<(), InvalidInputError> {
947    if addr.is_empty() {
948        return Err(InvalidInputError::new("mail address must not be empty"));
949    }
950    // RFC 5321 / 6531 length limits apply on octet counts, not on
951    // character counts — UTF-8 encoded length is what travels on the
952    // wire and what counts toward the 254-octet path limit.
953    if addr.len() > MAX_ADDRESS_LEN {
954        return Err(InvalidInputError::new(
955            "mail address exceeds RFC 5321 §4.5.3.1.3 length limit (254 octets)",
956        ));
957    }
958    if let Some(at_pos) = addr.rfind('@') {
959        let (local, domain) = addr.split_at(at_pos);
960        let domain = &domain[1..];
961        if local.len() > MAX_LOCAL_PART_LEN {
962            return Err(InvalidInputError::new(
963                "mail address local-part exceeds RFC 5321 §4.5.3.1.1 length limit (64 octets)",
964            ));
965        }
966        if domain.len() > MAX_DOMAIN_LEN {
967            return Err(InvalidInputError::new(
968                "mail address domain exceeds RFC 5321 §4.5.3.1.2 length limit (255 octets)",
969            ));
970        }
971    }
972    for ch in addr.chars() {
973        match ch {
974            '\r' | '\n' => {
975                return Err(InvalidInputError::new(
976                    "mail address must not contain CR or LF",
977                ));
978            }
979            '\0' => {
980                return Err(InvalidInputError::new(
981                    "mail address must not contain a NUL byte",
982                ));
983            }
984            '<' | '>' => {
985                return Err(InvalidInputError::new(
986                    "mail address must not contain ASCII < or >",
987                ));
988            }
989            ' ' | '\t' => {
990                return Err(InvalidInputError::new(
991                    "mail address must not contain ASCII whitespace",
992                ));
993            }
994            // ASCII control characters (C0 + DEL) other than the
995            // CR/LF/NUL we caught above. (Tab was caught as
996            // whitespace above.)
997            c if (c as u32) < 0x20 || (c as u32) == 0x7F => {
998                return Err(InvalidInputError::new(
999                    "mail address must not contain ASCII control characters",
1000                ));
1001            }
1002            // C1 control characters (U+0080-U+009F).
1003            c if (0x80..=0x9F).contains(&(c as u32)) => {
1004                return Err(InvalidInputError::new(
1005                    "mail address must not contain C1 control characters",
1006                ));
1007            }
1008            _ => {}
1009        }
1010    }
1011    Ok(())
1012}
1013
1014/// Format `MAIL FROM:<addr> SMTPUTF8\r\n` as bytes.
1015///
1016/// The `SMTPUTF8` ESMTP parameter (RFC 6531 §3.4) signals to the
1017/// server that the upcoming envelope and message contain UTF-8.
1018/// Servers that did not advertise the extension will reject the
1019/// command; callers should confirm advertisement with
1020/// [`ehlo_advertises_smtputf8`] before invoking this helper.
1021///
1022/// Address validation is the caller's responsibility (use
1023/// [`validate_address_utf8`]); this helper formats unconditionally.
1024#[cfg(feature = "smtputf8")]
1025#[must_use]
1026pub fn format_mail_from_smtputf8(addr: &str) -> Vec<u8> {
1027    // "MAIL FROM:<" (11) + addr + "> SMTPUTF8\r\n" (12) = 23 + addr.len()
1028    let mut out = Vec::with_capacity(23 + addr.len());
1029    out.extend_from_slice(b"MAIL FROM:<");
1030    out.extend_from_slice(addr.as_bytes());
1031    out.extend_from_slice(b"> SMTPUTF8\r\n");
1032    out
1033}