wasm_smtp/protocol.rs
1//! SMTP wire-format helpers.
2//!
3//! This module is the home for all logic that touches SMTP bytes directly:
4//!
5//! - [`parse_reply_line`] interprets a single CRLF-stripped reply line.
6//! - [`Reply`] aggregates one or more lines into a complete reply.
7//! - [`format_command`] / [`format_command_arg`] produce CRLF-terminated
8//! command bytes.
9//! - [`dot_stuff_and_terminate`] produces a complete DATA payload from a
10//! user-supplied body, including the `\r\n.\r\n` terminator.
11//! - [`base64_encode`] is a small, dependency-free encoder used for
12//! `AUTH LOGIN`. We do not need a decoder.
13//! - The `validate_*` functions reject caller input that would inject CRLF
14//! sequences or otherwise violate SMTP grammar before any byte is sent.
15//!
16//! None of these helpers perform I/O; they operate on borrowed buffers and
17//! return owned bytes or errors.
18
19use crate::error::{InvalidInputError, ProtocolError};
20
21/// Maximum length of a single reply line, excluding CRLF.
22///
23/// RFC 5321 §4.5.3.1.5 sets a 512-octet limit for reply lines including
24/// CRLF. We accept up to 998 octets of text plus CRLF (the body line limit
25/// from §4.5.3.1.6) to be lenient toward real-world server software that
26/// occasionally exceeds the strict reply-line limit.
27pub const MAX_REPLY_LINE_LEN: usize = 998;
28
29/// Maximum number of lines accepted in a single multi-line reply.
30///
31/// SMTP does not specify a hard cap, but a reasonable defensive limit
32/// prevents an unbounded server from causing unbounded allocation.
33pub const MAX_REPLY_LINES: usize = 128;
34
35/// Maximum length of an envelope address (RFC 5321 §4.5.3.1.3).
36///
37/// The standard's `Path` limit is 256 octets, including the angle
38/// brackets that frame the address on the wire. With brackets
39/// stripped, the validated address may be at most 254 octets.
40pub const MAX_ADDRESS_LEN: usize = 254;
41
42/// Maximum length of an address local-part (RFC 5321 §4.5.3.1.1).
43pub const MAX_LOCAL_PART_LEN: usize = 64;
44
45/// Maximum length of an address domain (RFC 5321 §4.5.3.1.2).
46pub const MAX_DOMAIN_LEN: usize = 255;
47
48// -----------------------------------------------------------------------------
49// Reply parsing
50// -----------------------------------------------------------------------------
51
52/// One parsed line of an SMTP reply.
53#[derive(Debug, Clone, PartialEq, Eq)]
54pub struct ReplyLine<'a> {
55 /// The three-digit reply code.
56 pub code: u16,
57 /// `true` if this line was terminated with a space (last line of a
58 /// reply); `false` if terminated with `-` (continuation).
59 pub is_last: bool,
60 /// The text portion after the separator. May be empty.
61 pub text: &'a [u8],
62}
63
64/// Parse a single CRLF-stripped reply line.
65///
66/// The input must not contain the terminating CRLF.
67pub fn parse_reply_line(line: &[u8]) -> Result<ReplyLine<'_>, ProtocolError> {
68 if line.len() < 3 {
69 return Err(malformed(line));
70 }
71 let d0 = ascii_digit_value(line[0]).ok_or_else(|| malformed(line))?;
72 let d1 = ascii_digit_value(line[1]).ok_or_else(|| malformed(line))?;
73 let d2 = ascii_digit_value(line[2]).ok_or_else(|| malformed(line))?;
74 let code = u16::from(d0) * 100 + u16::from(d1) * 10 + u16::from(d2);
75
76 if line.len() == 3 {
77 // RFC 5321 requires a separator, but a code-only line with no text
78 // and no separator is unambiguous: treat it as a last line.
79 return Ok(ReplyLine {
80 code,
81 is_last: true,
82 text: &[],
83 });
84 }
85 let (is_last, text) = match line[3] {
86 b' ' => (true, &line[4..]),
87 b'-' => (false, &line[4..]),
88 _ => return Err(malformed(line)),
89 };
90 Ok(ReplyLine {
91 code,
92 is_last,
93 text,
94 })
95}
96
97fn ascii_digit_value(b: u8) -> Option<u8> {
98 if b.is_ascii_digit() {
99 Some(b - b'0')
100 } else {
101 None
102 }
103}
104
105fn malformed(line: &[u8]) -> ProtocolError {
106 ProtocolError::Malformed(String::from_utf8_lossy(line).into_owned())
107}
108
109/// An enhanced status code from RFC 3463, parsed out of an SMTP reply
110/// when the server has advertised the `ENHANCEDSTATUSCODES` extension
111/// (RFC 2034).
112///
113/// Enhanced codes are formatted `class.subject.detail`, for example
114/// `5.7.1` (relay access denied) or `4.7.0` (security feature
115/// temporarily unavailable). The basic three-digit reply code (e.g.
116/// `550`) and the enhanced code share the leading digit (the
117/// "class"); the remaining two fields refine the diagnosis far
118/// beyond what the basic code carries.
119///
120/// This type is preserved across the [`Reply`] on which it is parsed,
121/// and reproduced in [`crate::ProtocolError::UnexpectedCode`] when an
122/// unexpected reply triggers an error. Callers can use the structured
123/// fields to make routing decisions ("if subject is 5.1.* the address
124/// is permanently bad; if 4.x.x retry later").
125///
126/// Per RFC 3463 §2:
127/// - `class` is one of 2, 4, or 5 (success / persistent transient /
128/// permanent).
129/// - `subject` and `detail` are 0–999.
130#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
131pub struct EnhancedStatus {
132 /// Leading class digit (2, 4, or 5).
133 pub class: u8,
134 /// Second field: the broad subject category (e.g. `1` = address,
135 /// `7` = security/policy).
136 pub subject: u16,
137 /// Third field: the specific detail within the subject.
138 pub detail: u16,
139}
140
141impl EnhancedStatus {
142 /// Format as `class.subject.detail`. This is the wire form RFC 3463
143 /// uses, with the leading dot-decimal and no padding.
144 #[must_use]
145 pub fn to_dotted(&self) -> String {
146 format!("{}.{}.{}", self.class, self.subject, self.detail)
147 }
148}
149
150impl core::fmt::Display for EnhancedStatus {
151 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
152 write!(f, "{}.{}.{}", self.class, self.subject, self.detail)
153 }
154}
155
156/// Try to parse an [`EnhancedStatus`] from the start of a reply line's
157/// text portion.
158///
159/// The expected format is `"x.y.z"` followed by either end-of-string,
160/// whitespace, or any other non-digit-non-dot byte. Invalid prefixes
161/// — including missing dots, non-digit characters, or class digits
162/// other than `2`, `4`, `5` — return `None`. The caller advances
163/// past the parsed prefix only when this returns `Some`.
164///
165/// Returns `(status, prefix_len)` where `prefix_len` is the number of
166/// bytes consumed from `text`, including any single trailing
167/// whitespace octet. This lets [`Reply::joined_text`] strip the code
168/// before showing the user-facing message.
169fn parse_enhanced_status_prefix(text: &str) -> Option<(EnhancedStatus, usize)> {
170 // We require at least 5 chars (`x.y.z`) and a class digit in {2,4,5}.
171 let bytes = text.as_bytes();
172 if bytes.len() < 5 {
173 return None;
174 }
175 let class_byte = bytes[0];
176 if !matches!(class_byte, b'2' | b'4' | b'5') || bytes[1] != b'.' {
177 return None;
178 }
179
180 // subject: digits, terminated by '.'.
181 let mut i = 2;
182 let subj_start = i;
183 while i < bytes.len() && bytes[i].is_ascii_digit() {
184 i += 1;
185 }
186 if i == subj_start || i >= bytes.len() || bytes[i] != b'.' {
187 return None;
188 }
189 let subject: u16 = text[subj_start..i].parse().ok()?;
190 i += 1;
191
192 // detail: digits, terminated by whitespace or end of string.
193 let det_start = i;
194 while i < bytes.len() && bytes[i].is_ascii_digit() {
195 i += 1;
196 }
197 if i == det_start {
198 return None;
199 }
200 let detail: u16 = text[det_start..i].parse().ok()?;
201
202 // The terminator: end-of-string, single space, or single tab.
203 // We consume one whitespace byte so the user-facing message starts
204 // cleanly. Any other non-digit byte is allowed but not consumed.
205 let prefix_len = if i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
206 i + 1
207 } else {
208 i
209 };
210
211 Some((
212 EnhancedStatus {
213 class: class_byte - b'0',
214 subject,
215 detail,
216 },
217 prefix_len,
218 ))
219}
220
221/// A complete SMTP reply, possibly assembled from multiple continuation
222/// lines.
223#[derive(Debug, Clone, PartialEq, Eq)]
224pub struct Reply {
225 /// The three-digit reply code, shared by every line of the reply.
226 pub code: u16,
227 /// One entry per line, in the order received. Each entry is the line's
228 /// text portion (after the code and separator) decoded as UTF-8 with
229 /// invalid sequences replaced by `U+FFFD`. The text retains any
230 /// enhanced status code prefix; use [`Self::message_text`] to obtain
231 /// the same text with the prefix stripped, or [`Self::enhanced`] to
232 /// obtain the parsed code itself.
233 pub lines: Vec<String>,
234 /// Parsed enhanced status code (RFC 3463), set only when the server
235 /// has advertised `ENHANCEDSTATUSCODES` for this session. The code
236 /// is taken from the first reply line; multi-line replies that
237 /// disagree on the code are flagged at parse time, so this is well
238 /// defined when present.
239 enhanced: Option<EnhancedStatus>,
240}
241
242impl Reply {
243 /// Construct a reply with the given code and lines, with no enhanced
244 /// status code attached. The client adds an enhanced code via the
245 /// internal `attach_enhanced_status` setter when the session has
246 /// `ENHANCEDSTATUSCODES` enabled.
247 #[must_use]
248 pub fn new(code: u16, lines: Vec<String>) -> Self {
249 Self {
250 code,
251 lines,
252 enhanced: None,
253 }
254 }
255
256 /// The leading digit of the reply code, useful for class-based checks.
257 pub fn class(&self) -> u8 {
258 u8::try_from(self.code / 100).unwrap_or(0)
259 }
260
261 /// Reply text concatenated with `\n`. Suitable for diagnostics.
262 /// If an enhanced status code prefix is present, it is preserved in
263 /// the output; use [`Self::message_text`] for a presentation that
264 /// hides it.
265 ///
266 /// # Caveat for log handlers
267 ///
268 /// The returned `String` may contain `\n` (used internally to
269 /// separate multi-line replies). It does **not** contain `\r` —
270 /// CRLF is stripped by the reply parser before storage — but
271 /// applications that forward this text to line-oriented loggers
272 /// (`syslog`, journald, structured JSON, etc.) should still
273 /// escape or render newlines explicitly to avoid log injection
274 /// where one logical reply renders as multiple log records. The
275 /// same caveat applies to anything else that consumes the
276 /// `Display` output of [`crate::ProtocolError`] or
277 /// [`crate::AuthError`], since those types embed reply text.
278 pub fn joined_text(&self) -> String {
279 self.lines.join("\n")
280 }
281
282 /// Reply text with any enhanced status code prefix stripped from
283 /// each line. Suitable for human-facing error messages where the
284 /// code is shown separately. Lines that have no enhanced prefix
285 /// are returned unchanged.
286 pub fn message_text(&self) -> String {
287 if self.enhanced.is_none() {
288 return self.joined_text();
289 }
290 let stripped: Vec<&str> = self
291 .lines
292 .iter()
293 .map(|line| match parse_enhanced_status_prefix(line) {
294 Some((_, prefix_len)) => &line[prefix_len..],
295 None => line.as_str(),
296 })
297 .collect();
298 stripped.join("\n")
299 }
300
301 /// Parsed enhanced status code, if the server has provided one and
302 /// the session has it enabled.
303 #[must_use]
304 pub fn enhanced(&self) -> Option<EnhancedStatus> {
305 self.enhanced
306 }
307
308 /// Set the enhanced status code on this reply. Used by the client
309 /// after the EHLO capability set has been confirmed to include
310 /// `ENHANCEDSTATUSCODES`.
311 pub(crate) fn attach_enhanced_status(&mut self, status: EnhancedStatus) {
312 self.enhanced = Some(status);
313 }
314
315 /// Iterate over the trimmed text of each line. Useful for parsing EHLO
316 /// capabilities, where the first line contains the greeting and the
317 /// remaining lines each name a single capability (e.g. `AUTH LOGIN`,
318 /// `PIPELINING`, `8BITMIME`).
319 pub fn iter_lines(&self) -> impl Iterator<Item = &str> {
320 self.lines.iter().map(String::as_str)
321 }
322
323 /// Parse an enhanced status code from the first line's text, if
324 /// present. Used by the client to populate `self.enhanced` only when
325 /// the session has `ENHANCEDSTATUSCODES` enabled.
326 #[must_use]
327 pub fn try_parse_enhanced(&self) -> Option<EnhancedStatus> {
328 self.lines
329 .first()
330 .and_then(|line| parse_enhanced_status_prefix(line).map(|(s, _)| s))
331 }
332}
333
334// -----------------------------------------------------------------------------
335// Command formatting
336// -----------------------------------------------------------------------------
337
338/// Format a command with no arguments, terminated with CRLF.
339///
340/// Example: `format_command("QUIT")` yields `b"QUIT\r\n"`.
341pub fn format_command(verb: &str) -> Vec<u8> {
342 let mut buf = Vec::with_capacity(verb.len() + 2);
343 buf.extend_from_slice(verb.as_bytes());
344 buf.extend_from_slice(b"\r\n");
345 buf
346}
347
348/// Format a command with a single argument, terminated with CRLF.
349///
350/// Example: `format_command_arg("EHLO", "client.example.com")` yields
351/// `b"EHLO client.example.com\r\n"`.
352///
353/// Callers are responsible for argument validation; this function does no
354/// escaping.
355pub fn format_command_arg(verb: &str, arg: &str) -> Vec<u8> {
356 let mut buf = Vec::with_capacity(verb.len() + 1 + arg.len() + 2);
357 buf.extend_from_slice(verb.as_bytes());
358 buf.push(b' ');
359 buf.extend_from_slice(arg.as_bytes());
360 buf.extend_from_slice(b"\r\n");
361 buf
362}
363
364/// Format `MAIL FROM:<addr>\r\n`. The caller must validate `addr` first.
365pub fn format_mail_from(addr: &str) -> Vec<u8> {
366 let mut buf = Vec::with_capacity(13 + addr.len() + 2);
367 buf.extend_from_slice(b"MAIL FROM:<");
368 buf.extend_from_slice(addr.as_bytes());
369 buf.extend_from_slice(b">\r\n");
370 buf
371}
372
373/// Format `RCPT TO:<addr>\r\n`. The caller must validate `addr` first.
374pub fn format_rcpt_to(addr: &str) -> Vec<u8> {
375 let mut buf = Vec::with_capacity(11 + addr.len() + 2);
376 buf.extend_from_slice(b"RCPT TO:<");
377 buf.extend_from_slice(addr.as_bytes());
378 buf.extend_from_slice(b">\r\n");
379 buf
380}
381
382// -----------------------------------------------------------------------------
383// DATA payload
384// -----------------------------------------------------------------------------
385
386/// Produce the DATA-phase byte stream from a user-supplied body.
387///
388/// The output:
389///
390/// 1. has any line beginning with `.` doubled (RFC 5321 §4.5.2 dot-stuffing);
391/// 2. is guaranteed to end with `\r\n` (a CRLF is appended if the input
392/// does not already end with one);
393/// 3. is followed by the end-of-data terminator `.\r\n`.
394///
395/// The body is expected to be CRLF-normalized. The function does not
396/// translate lone LF or CR bytes; callers needing such translation should
397/// preprocess the body.
398///
399/// The body's bytes are not inspected beyond `\r`, `\n`, and `.`, so the
400/// payload may contain any 8-bit data the server is willing to accept (for
401/// example, after a `250 8BITMIME` capability advertisement).
402pub fn dot_stuff_and_terminate(body: &[u8]) -> Vec<u8> {
403 let mut out = Vec::with_capacity(body.len() + 8);
404 let mut at_line_start = true;
405 let mut prev: u8 = 0;
406 for &b in body {
407 if at_line_start && b == b'.' {
408 out.push(b'.');
409 }
410 out.push(b);
411 at_line_start = prev == b'\r' && b == b'\n';
412 prev = b;
413 }
414 if !out.ends_with(b"\r\n") {
415 out.extend_from_slice(b"\r\n");
416 }
417 out.extend_from_slice(b".\r\n");
418 out
419}
420
421// ---------------------------------------------------------------------------
422// Streaming dot-stuffer state machine
423// ---------------------------------------------------------------------------
424
425/// Streaming version of the RFC 5321 dot-stuffer.
426///
427/// Unlike [`dot_stuff_and_terminate`], this processes the message body
428/// one chunk at a time, keeping memory usage at O(chunk size) rather than
429/// O(body size). Suitable for large messages and memory-constrained runtimes.
430///
431/// ## Usage
432///
433/// ```rust
434/// use wasm_smtp::protocol::DotStufferState;
435///
436/// let mut stuffer = DotStufferState::new();
437///
438/// // Process each chunk.
439/// let chunk1 = b"Subject: test\r\n\r\n";
440/// let out1 = stuffer.process_chunk(chunk1);
441///
442/// let chunk2 = b".dotted line\r\nend\r\n";
443/// let out2 = stuffer.process_chunk(chunk2);
444///
445/// // Produce the end-of-data terminator.
446/// let terminator = stuffer.finish();
447///
448/// // on-wire: out1 + out2 + terminator
449/// assert_eq!(&out2[..2], b".."); // dot-stuffed
450/// assert_eq!(terminator, b".\r\n");
451/// ```
452#[derive(Debug, Clone)]
453pub struct DotStufferState {
454 /// True when the next byte to process is at the start of a line.
455 at_line_start: bool,
456 /// The last byte fed to `process_chunk`. Used by `finish()` to
457 /// determine whether a trailing `\r\n` must be added.
458 prev: u8,
459 /// The second-to-last byte. Together with `prev`, tells `finish()`
460 /// whether the body already ended with `\r\n`.
461 prev_prev: u8,
462 /// True until the first call to `process_chunk` with non-empty input.
463 empty: bool,
464}
465
466impl DotStufferState {
467 /// Create a new state machine, ready to process the first chunk.
468 #[must_use]
469 pub const fn new() -> Self {
470 Self {
471 at_line_start: true,
472 prev: 0,
473 prev_prev: 0,
474 empty: true,
475 }
476 }
477
478 /// Dot-stuff one chunk and return the processed bytes.
479 ///
480 /// The returned `Vec` is slightly larger than `chunk` only when one or
481 /// more lines in the chunk begin with `.`. All other bytes pass through
482 /// unchanged.
483 ///
484 /// `process_chunk` with an empty slice is a no-op and returns an empty
485 /// `Vec`.
486 ///
487 /// # Cross-chunk dot-stuffing
488 ///
489 /// The state machine correctly handles dots that appear at the start of
490 /// a line which spans two consecutive chunks. For example, if chunk N
491 /// ends with `\r\n` and chunk N+1 starts with `.`, the leading dot in
492 /// chunk N+1 will be stuffed.
493 pub fn process_chunk(&mut self, chunk: &[u8]) -> Vec<u8> {
494 if chunk.is_empty() {
495 return Vec::new();
496 }
497 // Worst case: every byte is a leading dot; output is 2× input.
498 let mut out = Vec::with_capacity(chunk.len() + 4);
499 for &b in chunk {
500 if self.at_line_start && b == b'.' {
501 out.push(b'.');
502 }
503 out.push(b);
504 let new_at_line_start = self.prev == b'\r' && b == b'\n';
505 self.prev_prev = self.prev;
506 self.prev = b;
507 self.at_line_start = new_at_line_start;
508 }
509 self.empty = false;
510 out
511 }
512
513 /// Consume the state machine and produce the end-of-DATA bytes.
514 ///
515 /// The output is:
516 ///
517 /// - `\r\n.\r\n` if the body did not end with `\r\n` (or was empty).
518 /// - `.\r\n` if the body already ended with `\r\n`.
519 ///
520 /// This matches the semantics of [`dot_stuff_and_terminate`] exactly.
521 #[must_use]
522 pub fn finish(self) -> Vec<u8> {
523 let ends_with_crlf =
524 !self.empty && self.prev_prev == b'\r' && self.prev == b'\n';
525 let mut out = Vec::with_capacity(5);
526 if !ends_with_crlf {
527 out.extend_from_slice(b"\r\n");
528 }
529 out.extend_from_slice(b".\r\n");
530 out
531 }
532}
533
534impl Default for DotStufferState {
535 fn default() -> Self {
536 Self::new()
537 }
538}
539
540// -----------------------------------------------------------------------------
541// Base64
542// -----------------------------------------------------------------------------
543
544const BASE64_ALPHABET: &[u8; 64] =
545 b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
546
547/// Standard base64 encoding (RFC 4648), padded with `=`.
548///
549/// Used for `AUTH LOGIN`. We deliberately avoid pulling in an external
550/// base64 dependency; the implementation is small and easy to audit.
551pub fn base64_encode(input: &[u8]) -> String {
552 let mut out = String::with_capacity(input.len().div_ceil(3) * 4);
553 let chunks = input.chunks_exact(3);
554 let rem = chunks.remainder();
555 for chunk in chunks {
556 let n = (u32::from(chunk[0]) << 16) | (u32::from(chunk[1]) << 8) | u32::from(chunk[2]);
557 push_b64(&mut out, n, 4);
558 }
559 match rem.len() {
560 0 => {}
561 1 => {
562 let n = u32::from(rem[0]) << 16;
563 push_b64(&mut out, n, 2);
564 out.push_str("==");
565 }
566 2 => {
567 let n = (u32::from(rem[0]) << 16) | (u32::from(rem[1]) << 8);
568 push_b64(&mut out, n, 3);
569 out.push('=');
570 }
571 _ => unreachable!(),
572 }
573 out
574}
575
576fn push_b64(out: &mut String, n: u32, count: u8) {
577 // count is the number of significant base64 characters to emit (2..=4)
578 // shifts: index 0 -> 18, 1 -> 12, 2 -> 6, 3 -> 0
579 for i in 0..count {
580 let shift = 18 - 6 * i;
581 let idx = ((n >> shift) & 0x3F) as usize;
582 out.push(char::from(BASE64_ALPHABET[idx]));
583 }
584}
585
586/// Standard base64 decoding (RFC 4648), padded with `=`.
587///
588/// The symmetric counterpart of [`base64_encode`]. Used for SCRAM
589/// `server-first` and `server-final` decoding.
590///
591/// Returns `Err` for inputs whose length is not a multiple of 4, or
592/// that contain characters outside the standard base64 alphabet
593/// (`A-Z`, `a-z`, `0-9`, `+`, `/`, `=`). Padding is allowed only at
594/// the end.
595///
596/// # Errors
597///
598/// Returns the static string `"invalid base64"` on any decode
599/// failure. The caller is expected to wrap this in a
600/// domain-appropriate error type.
601pub fn base64_decode(input: &str) -> Result<Vec<u8>, &'static str> {
602 let bytes = input.as_bytes();
603 if bytes.is_empty() {
604 return Ok(Vec::new());
605 }
606 if bytes.len() % 4 != 0 {
607 return Err("invalid base64");
608 }
609
610 let mut out = Vec::with_capacity(bytes.len() / 4 * 3);
611 for (chunk_idx, chunk) in bytes.chunks_exact(4).enumerate() {
612 let is_last = chunk_idx == (bytes.len() / 4) - 1;
613 let mut buf = [0u8; 4];
614 let mut pad = 0usize;
615 for (i, &c) in chunk.iter().enumerate() {
616 buf[i] = match c {
617 b'A'..=b'Z' => c - b'A',
618 b'a'..=b'z' => c - b'a' + 26,
619 b'0'..=b'9' => c - b'0' + 52,
620 b'+' => 62,
621 b'/' => 63,
622 b'=' => {
623 pad += 1;
624 0
625 }
626 _ => return Err("invalid base64"),
627 };
628 }
629 if pad > 0 && !is_last {
630 return Err("invalid base64");
631 }
632 let n = (u32::from(buf[0]) << 18)
633 | (u32::from(buf[1]) << 12)
634 | (u32::from(buf[2]) << 6)
635 | u32::from(buf[3]);
636 out.push(((n >> 16) & 0xff) as u8);
637 if pad < 2 {
638 out.push(((n >> 8) & 0xff) as u8);
639 }
640 if pad < 1 {
641 out.push((n & 0xff) as u8);
642 }
643 }
644 Ok(out)
645}
646
647// -----------------------------------------------------------------------------
648// Input validation
649// -----------------------------------------------------------------------------
650
651/// Validate a mail address (RFC 5321 reverse-path / forward-path content).
652///
653/// The check is intentionally conservative: it rejects the characters that
654/// would either inject SMTP commands or violate the framing of `<addr>`.
655/// Validate an envelope address (used in MAIL FROM / RCPT TO) against
656/// RFC 5321 grammar and the length limits in §4.5.3.1.
657///
658/// The check is conservative — it does not parse RFC 5321 grammar in
659/// detail, but it forbids any byte that would corrupt the command
660/// framing, and rejects values that exceed the standard's per-field
661/// length limits.
662///
663/// In particular:
664///
665/// - non-empty;
666/// - ASCII only — UTF-8 addresses require the `smtputf8` feature
667/// (which exposes a separate UTF-8-permissive validator);
668/// - no `\r`, `\n`, or `\0`;
669/// - no `<`, `>`, or space (which would corrupt the angle-bracket framing);
670/// - the whole address (local-part + `@` + domain) must be no longer
671/// than 254 octets — RFC 5321 §4.5.3.1.3 specifies 256 for the
672/// `Path` token including angle brackets, leaving 254 for the
673/// bracket-stripped address;
674/// - if an `@` is present, the local-part is no longer than 64 octets
675/// and the domain is no longer than 255 octets (§4.5.3.1.1 /
676/// §4.5.3.1.2). These limits are advisory: many real-world relays
677/// accept longer values, but rejecting at the client boundary
678/// prevents a misformed input from generating a wire `MAIL FROM`
679/// line that exceeds the SMTP line-length limit (§4.5.3.1.5).
680pub fn validate_address(addr: &str) -> Result<(), InvalidInputError> {
681 if addr.is_empty() {
682 return Err(InvalidInputError::new("mail address must not be empty"));
683 }
684 if !addr.is_ascii() {
685 return Err(InvalidInputError::new(
686 "mail address must be ASCII (SMTPUTF8 is not supported)",
687 ));
688 }
689 if addr.len() > MAX_ADDRESS_LEN {
690 return Err(InvalidInputError::new(
691 "mail address exceeds RFC 5321 §4.5.3.1.3 length limit (254 octets)",
692 ));
693 }
694 if let Some(at_pos) = addr.rfind('@') {
695 let (local, domain) = addr.split_at(at_pos);
696 // domain still has the leading '@' — strip it.
697 let domain = &domain[1..];
698 if local.len() > MAX_LOCAL_PART_LEN {
699 return Err(InvalidInputError::new(
700 "mail address local-part exceeds RFC 5321 §4.5.3.1.1 length limit (64 octets)",
701 ));
702 }
703 if domain.len() > MAX_DOMAIN_LEN {
704 return Err(InvalidInputError::new(
705 "mail address domain exceeds RFC 5321 §4.5.3.1.2 length limit (255 octets)",
706 ));
707 }
708 }
709 for b in addr.bytes() {
710 match b {
711 b'\r' | b'\n' => {
712 return Err(InvalidInputError::new(
713 "mail address must not contain CR or LF",
714 ));
715 }
716 0 => {
717 return Err(InvalidInputError::new(
718 "mail address must not contain a NUL byte",
719 ));
720 }
721 b'<' | b'>' => {
722 return Err(InvalidInputError::new(
723 "mail address must not contain '<' or '>'",
724 ));
725 }
726 b' ' | b'\t' => {
727 return Err(InvalidInputError::new(
728 "mail address must not contain whitespace",
729 ));
730 }
731 _ => {}
732 }
733 }
734 Ok(())
735}
736
737/// Validate the domain argument supplied to `EHLO`.
738///
739/// Accepts any non-empty sequence of printable ASCII (0x21..=0x7E). Address
740/// literals (e.g. `[192.0.2.1]`) and dotted FQDNs both pass. The check is
741/// intentionally lenient: its job is to prevent CRLF injection, not to
742/// enforce DNS syntax.
743pub fn validate_ehlo_domain(domain: &str) -> Result<(), InvalidInputError> {
744 if domain.is_empty() {
745 return Err(InvalidInputError::new("EHLO domain must not be empty"));
746 }
747 if !domain.is_ascii() {
748 return Err(InvalidInputError::new("EHLO domain must be ASCII"));
749 }
750 if domain.bytes().any(|b| !(0x21..=0x7E).contains(&b)) {
751 return Err(InvalidInputError::new(
752 "EHLO domain must contain only printable ASCII characters",
753 ));
754 }
755 Ok(())
756}
757
758/// Validate the username supplied to `AUTH LOGIN`.
759///
760/// As of v0.5.0 this is a thin alias for [`validate_plain_username`]:
761/// the two SASL mechanisms (PLAIN and LOGIN) accept the same shape
762/// of credential string and the same constraints apply. NUL bytes
763/// are rejected because they would corrupt the SASL framing on the
764/// post-base64 server side.
765///
766/// The function is retained for source compatibility with v0.4.x
767/// callers, but new code should use [`validate_plain_username`]
768/// directly. A future major release may remove this alias.
769pub fn validate_login_username(user: &str) -> Result<(), InvalidInputError> {
770 validate_plain_username(user)
771}
772
773/// Validate the password supplied to `AUTH LOGIN`.
774///
775/// As of v0.5.0 this is a thin alias for [`validate_plain_password`].
776/// See [`validate_login_username`] for the rationale.
777pub fn validate_login_password(pass: &str) -> Result<(), InvalidInputError> {
778 validate_plain_password(pass)
779}
780
781// -----------------------------------------------------------------------------
782// EHLO capability inspection
783// -----------------------------------------------------------------------------
784
785/// Return `true` if the EHLO capability lines advertise an `AUTH` mechanism
786/// named `mechanism`. The check is case-insensitive on both the keyword
787/// and the mechanism name.
788///
789/// `capability_lines` is the slice of lines that follows the greeting in
790/// an `EHLO` reply: each line is one extension (e.g. `"AUTH LOGIN PLAIN"`,
791/// `"PIPELINING"`, `"8BITMIME"`).
792pub fn ehlo_advertises_auth<S: AsRef<str>>(capability_lines: &[S], mechanism: &str) -> bool {
793 for line in capability_lines {
794 let mut parts = line.as_ref().split_ascii_whitespace();
795 let Some(head) = parts.next() else { continue };
796 if !head.eq_ignore_ascii_case("AUTH") {
797 continue;
798 }
799 for mech in parts {
800 if mech.eq_ignore_ascii_case(mechanism) {
801 return true;
802 }
803 }
804 }
805 false
806}
807
808/// Return `true` if the EHLO capability lines advertise the `STARTTLS`
809/// extension (RFC 3207). The check is case-insensitive on the keyword.
810///
811/// `capability_lines` is the slice of lines that follows the greeting in
812/// an `EHLO` reply; each line is one extension keyword optionally
813/// followed by parameters.
814pub fn ehlo_advertises_starttls<S: AsRef<str>>(capability_lines: &[S]) -> bool {
815 for line in capability_lines {
816 if let Some(head) = line.as_ref().split_ascii_whitespace().next()
817 && head.eq_ignore_ascii_case("STARTTLS")
818 {
819 return true;
820 }
821 }
822 false
823}
824
825/// Return `true` if the EHLO capability lines advertise the
826/// `ENHANCEDSTATUSCODES` extension (RFC 2034). The check is
827/// case-insensitive on the keyword.
828///
829/// When this is `true` for a session, the SMTP client parses the
830/// `class.subject.detail` prefix off each reply and exposes it as
831/// [`EnhancedStatus`] both on the [`Reply`] itself and on
832/// [`crate::ProtocolError::UnexpectedCode`]. When the keyword is not
833/// advertised, the same byte sequence in a reply (a stray "5.1.1"
834/// for instance) is left as-is in the message text and not parsed.
835pub fn ehlo_advertises_enhanced_status_codes<S: AsRef<str>>(capability_lines: &[S]) -> bool {
836 for line in capability_lines {
837 if let Some(head) = line.as_ref().split_ascii_whitespace().next()
838 && head.eq_ignore_ascii_case("ENHANCEDSTATUSCODES")
839 {
840 return true;
841 }
842 }
843 false
844}
845
846// -----------------------------------------------------------------------------
847// Authentication mechanisms
848// -----------------------------------------------------------------------------
849
850/// SASL authentication mechanisms supported by this client.
851///
852/// Today the crate implements `PLAIN` (RFC 4616) and `LOGIN` (the
853/// historical mechanism used by many submission servers). The enum is
854/// `non_exhaustive` so that future additions (e.g. `XOAUTH2`,
855/// `SCRAM-SHA-256`) do not require a major version bump.
856///
857/// `PLAIN` is preferred when both are advertised: it is one network
858/// round-trip rather than two, and is an IETF-standard SASL mechanism.
859/// `LOGIN` is retained for compatibility with older submission servers
860/// that advertise only it.
861#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
862#[non_exhaustive]
863pub enum AuthMechanism {
864 /// SASL `PLAIN` (RFC 4616). Sends `\0user\0pass` base64-encoded as
865 /// the initial response, completing in a single round-trip.
866 Plain,
867 /// `LOGIN`. Sends username and password as separate base64 lines
868 /// in response to two `334` server prompts.
869 Login,
870 /// SASL `XOAUTH2` (Google / Microsoft OAuth 2.0 SMTP extension).
871 /// Sends `user={user}\x01auth=Bearer {token}\x01\x01`
872 /// base64-encoded as the initial response. The "credential" passed
873 /// to `login_with` for this mechanism is an OAuth 2.0 access
874 /// token, not a static password — auto-selection by `login()`
875 /// deliberately does NOT pick this mechanism for that reason.
876 XOAuth2,
877 /// SASL `OAUTHBEARER` (RFC 7628). The IETF-standard OAuth 2.0 SASL
878 /// mechanism. Sends `n,a={user},\x01auth=Bearer {token}\x01\x01`
879 /// (GS2 header + Bearer token) base64-encoded as the initial
880 /// response. More interoperable than `XOAUTH2` and defined by an
881 /// IETF RFC. Like `XOAUTH2`, auto-selection by `login()` does NOT
882 /// pick this mechanism because the credential is a token, not a
883 /// password.
884 ///
885 /// Available only with the `oauthbearer` cargo feature (default-on).
886 OAuthBearer,
887 /// SASL `SCRAM-SHA-256` (RFC 5802 / RFC 7677). Challenge-response
888 /// authentication: the client never transmits the password, and
889 /// the server proves possession of the salted hash through a
890 /// signature step. Auto-selection by `login()` prefers this
891 /// mechanism over `PLAIN` and `LOGIN` when the server advertises
892 /// it.
893 ///
894 /// Available only with the `scram-sha-256` cargo feature
895 /// (default-on).
896 ScramSha256,
897}
898
899impl AuthMechanism {
900 /// SMTP-on-the-wire keyword for this mechanism.
901 #[must_use]
902 pub const fn name(self) -> &'static str {
903 match self {
904 Self::Plain => "PLAIN",
905 Self::Login => "LOGIN",
906 Self::XOAuth2 => "XOAUTH2",
907 Self::OAuthBearer => "OAUTHBEARER",
908 Self::ScramSha256 => "SCRAM-SHA-256",
909 }
910 }
911}
912
913impl core::fmt::Display for AuthMechanism {
914 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
915 f.write_str(self.name())
916 }
917}
918
919/// Pick the best mechanism advertised by the server, preferring
920/// `SCRAM-SHA-256` over `PLAIN` over `LOGIN`. Returns `None` if the
921/// server advertised none of these.
922///
923/// Use this when you want a single `login` call to do the right thing
924/// across the variety of submission servers in deployment. If you need
925/// to lock in a specific mechanism (for example, to reproduce a
926/// production failure in a test), call [`crate::client::SmtpClient::login_with`]
927/// directly.
928///
929/// `SCRAM-SHA-256` is the modern default: it does not transmit the
930/// password in plaintext and is supported by all current submission
931/// servers (Postfix + Dovecot SASL, Exchange, Stalwart). `PLAIN` is
932/// the universal fallback. `LOGIN` is retained only for very old
933/// servers.
934///
935/// Note: when the `scram-sha-256` feature is disabled, the function
936/// behaves as if SCRAM were not in the picture and falls through to
937/// the PLAIN/LOGIN preference.
938pub fn select_auth_mechanism<S: AsRef<str>>(capability_lines: &[S]) -> Option<AuthMechanism> {
939 #[cfg(feature = "scram-sha-256")]
940 if ehlo_advertises_auth(capability_lines, "SCRAM-SHA-256") {
941 return Some(AuthMechanism::ScramSha256);
942 }
943
944 if ehlo_advertises_auth(capability_lines, "PLAIN") {
945 Some(AuthMechanism::Plain)
946 } else if ehlo_advertises_auth(capability_lines, "LOGIN") {
947 Some(AuthMechanism::Login)
948 } else {
949 None
950 }
951}
952
953/// Build the SASL `PLAIN` initial response for the given credentials.
954///
955/// The result is the base64 encoding of `\0user\0pass` (RFC 4616 §2).
956/// The empty authorization identity (the part before the first NUL)
957/// means "act as the authenticated user", which is the correct default
958/// for SMTP submission.
959///
960/// The caller is responsible for the surrounding command framing; the
961/// full on-wire bytes are `b"AUTH PLAIN " + result + b"\r\n"`.
962///
963/// # Encoding
964///
965/// `user` and `pass` are encoded as their UTF-8 bytes. RFC 4616 mandates
966/// UTF-8 for both fields; this matches Rust's `String` representation.
967#[must_use]
968pub fn build_auth_plain_initial_response(user: &str, pass: &str) -> String {
969 let mut payload = Vec::with_capacity(2 + user.len() + pass.len());
970 payload.push(0u8); // empty authzid
971 payload.extend_from_slice(user.as_bytes());
972 payload.push(0u8);
973 payload.extend_from_slice(pass.as_bytes());
974 base64_encode(&payload)
975}
976
977/// Validate the username supplied to a SASL `PLAIN` `AUTH` exchange.
978///
979/// RFC 4616 forbids NUL bytes in the authcid (NUL is the field
980/// separator). Empty usernames are also refused: while RFC 4616 itself
981/// allows them, no SMTP submission server accepts an empty login, and
982/// rejecting them up-front turns a server-side failure into a
983/// programmer-visible one.
984pub fn validate_plain_username(user: &str) -> Result<(), InvalidInputError> {
985 if user.is_empty() {
986 return Err(InvalidInputError::new("AUTH username must not be empty"));
987 }
988 if user.bytes().any(|b| b == 0) {
989 return Err(InvalidInputError::new(
990 "AUTH username must not contain a NUL byte",
991 ));
992 }
993 Ok(())
994}
995
996/// Validate the password supplied to a SASL `PLAIN` `AUTH` exchange.
997///
998/// As with [`validate_plain_username`], NUL bytes are forbidden because
999/// they would corrupt the SASL framing.
1000pub fn validate_plain_password(pass: &str) -> Result<(), InvalidInputError> {
1001 if pass.is_empty() {
1002 return Err(InvalidInputError::new("AUTH password must not be empty"));
1003 }
1004 if pass.bytes().any(|b| b == 0) {
1005 return Err(InvalidInputError::new(
1006 "AUTH password must not contain a NUL byte",
1007 ));
1008 }
1009 Ok(())
1010}
1011
1012// -----------------------------------------------------------------------------
1013// XOAUTH2 (Google / Microsoft OAuth 2.0 SASL profile)
1014//
1015// The three helpers in this section are feature-gated behind
1016// `xoauth2` (default-on). Disabling the feature removes them
1017// entirely along with the corresponding `SmtpClient::login_xoauth2`
1018// method and the `XOAuth2` arm of `login_with`. The
1019// `AuthMechanism::XOAuth2` and `SmtpOp::AuthXOAuth2` enum variants
1020// remain present in either configuration; both enums are
1021// `non_exhaustive` and the variants without the feature are simply
1022// unreachable through the public API.
1023// -----------------------------------------------------------------------------
1024
1025/// Build the SASL `XOAUTH2` initial response.
1026///
1027/// The wire format, before base64, is:
1028///
1029/// ```text
1030/// user={user}\x01auth=Bearer {token}\x01\x01
1031/// ```
1032///
1033/// where `\x01` is the SOH (Ctrl-A) byte that separates fields. The
1034/// `Bearer ` prefix is fixed and case-sensitive. Both the user and the
1035/// token are passed through verbatim; the caller must have validated
1036/// them with [`validate_xoauth2_user`] and [`validate_oauth2_token`]
1037/// first.
1038///
1039/// The returned string is the base64 encoding of the entire payload,
1040/// suitable for placement after `AUTH XOAUTH2 ` on the wire. The
1041/// caller is responsible for the surrounding command framing.
1042///
1043/// Available only with the `xoauth2` cargo feature enabled (default-on).
1044#[cfg(feature = "xoauth2")]
1045#[must_use]
1046pub fn build_xoauth2_initial_response(user: &str, token: &str) -> String {
1047 // Length: "user=" (5) + user + 1 (SOH) + "auth=Bearer " (12) + token
1048 // + 1 (SOH) + 1 (final SOH) = 19 + user.len() + token.len()
1049 let mut payload = Vec::with_capacity(19 + user.len() + token.len());
1050 payload.extend_from_slice(b"user=");
1051 payload.extend_from_slice(user.as_bytes());
1052 payload.push(0x01);
1053 payload.extend_from_slice(b"auth=Bearer ");
1054 payload.extend_from_slice(token.as_bytes());
1055 payload.push(0x01);
1056 payload.push(0x01);
1057 base64_encode(&payload)
1058}
1059
1060/// Validate the username supplied to a SASL `XOAUTH2` exchange.
1061///
1062/// XOAUTH2 (Google / Microsoft) does not formally constrain the user
1063/// field, but to prevent injection of the SOH separator, NUL, CR,
1064/// or LF into the SASL payload, we forbid those bytes. Empty
1065/// usernames are also rejected.
1066///
1067/// Available only with the `xoauth2` cargo feature enabled (default-on).
1068#[cfg(feature = "xoauth2")]
1069pub fn validate_xoauth2_user(user: &str) -> Result<(), InvalidInputError> {
1070 if user.is_empty() {
1071 return Err(InvalidInputError::new("XOAUTH2 user must not be empty"));
1072 }
1073 if user.bytes().any(|b| matches!(b, 0 | b'\r' | b'\n' | 0x01)) {
1074 return Err(InvalidInputError::new(
1075 "XOAUTH2 user must not contain NUL, CR, LF, or SOH",
1076 ));
1077 }
1078 Ok(())
1079}
1080
1081/// Validate an OAuth 2.0 access token before sending it on the wire.
1082///
1083/// RFC 6750 §2.1 limits a Bearer token to ASCII printable characters
1084/// (and a small set of punctuation), with no whitespace or control
1085/// characters. We enforce that subset: every byte must be in the
1086/// printable ASCII range `0x20..=0x7E` *except* whitespace
1087/// (`0x20` space and `0x09` tab are also disallowed because RFC 6750
1088/// requires `b64token` characters only). The SOH separator used by
1089/// XOAUTH2 is implicitly excluded by the printable-only rule.
1090///
1091/// This is conservative: it will reject some technically-valid token
1092/// shapes that real-world providers nonetheless never emit. In
1093/// practice both Google and Microsoft access tokens consist of
1094/// `[A-Za-z0-9._~+/=-]` and pass this check trivially.
1095///
1096/// Available only with the `xoauth2` cargo feature enabled (default-on).
1097#[cfg(feature = "xoauth2")]
1098pub fn validate_oauth2_token(token: &str) -> Result<(), InvalidInputError> {
1099 if token.is_empty() {
1100 return Err(InvalidInputError::new(
1101 "OAuth2 access token must not be empty",
1102 ));
1103 }
1104 for b in token.bytes() {
1105 // 0x21..=0x7E covers printable ASCII excluding space.
1106 if !(0x21..=0x7E).contains(&b) {
1107 return Err(InvalidInputError::new(
1108 "OAuth2 access token must contain only printable ASCII (no whitespace or control bytes)",
1109 ));
1110 }
1111 }
1112 Ok(())
1113}
1114
1115// -----------------------------------------------------------------------------
1116// OAUTHBEARER (RFC 7628) — feature-gated
1117// -----------------------------------------------------------------------------
1118
1119/// Build the base64-encoded initial response for `AUTH OAUTHBEARER` (RFC 7628).
1120///
1121/// RFC 7628 format: `n,a={user},\x01auth=Bearer {token}\x01\x01`
1122///
1123/// - `n` — GS2 header: no channel binding.
1124/// - `a={user}` — optional authorization identity (authzid). May be empty
1125/// (`n,,`) when the server should use the identity implied by the token.
1126/// - `\x01auth=Bearer {token}\x01\x01` — SASL key=value attributes.
1127///
1128/// The difference from `XOAUTH2`:
1129/// - `XOAUTH2` (Google proprietary): `user={email}\x01auth=Bearer {token}\x01\x01`
1130/// - `OAUTHBEARER` (RFC 7628): `n,a={email},\x01auth=Bearer {token}\x01\x01`
1131///
1132/// Available only with the `oauthbearer` cargo feature (default-on).
1133#[cfg(feature = "oauthbearer")]
1134#[must_use]
1135pub fn build_oauthbearer_initial_response(user: &str, token: &str) -> String {
1136 // "n,a=" + user + ",\x01auth=Bearer " + token + "\x01\x01"
1137 let mut payload = Vec::with_capacity(16 + user.len() + token.len());
1138 payload.extend_from_slice(b"n,a=");
1139 payload.extend_from_slice(user.as_bytes());
1140 payload.push(b',');
1141 payload.push(0x01);
1142 payload.extend_from_slice(b"auth=Bearer ");
1143 payload.extend_from_slice(token.as_bytes());
1144 payload.push(0x01);
1145 payload.push(0x01);
1146 base64_encode(&payload)
1147}
1148
1149// -----------------------------------------------------------------------------
1150// PIPELINING (RFC 2920) — feature-gated
1151// -----------------------------------------------------------------------------
1152
1153/// Return `true` if the EHLO capability lines advertise `PIPELINING` (RFC 2920).
1154///
1155/// When pipelining is available, `send_mail` batches `MAIL FROM`, all
1156/// `RCPT TO` commands, and `DATA` into a single write, reducing the
1157/// number of network round-trips.
1158#[cfg(feature = "pipelining")]
1159#[must_use]
1160pub fn ehlo_advertises_pipelining(caps: &[String]) -> bool {
1161 caps.iter().any(|c| c.eq_ignore_ascii_case("PIPELINING"))
1162}
1163
1164// -----------------------------------------------------------------------------
1165// SMTPUTF8 (RFC 6531) — feature-gated
1166// -----------------------------------------------------------------------------
1167//
1168// SMTPUTF8 lets a session carry mail addresses outside the ASCII
1169// repertoire — e.g. `送信者@例え.jp`. The crate gates the related
1170// helpers behind the `smtputf8` cargo feature: callers who only ever
1171// submit ASCII addresses pay no code-size cost for the UTF-8 validator,
1172// the `MAIL FROM ... SMTPUTF8` formatter, or the capability check.
1173//
1174// When the feature is disabled, none of the items below exist; the
1175// default `validate_address` and `format_mail_from` continue to enforce
1176// ASCII, as they always have.
1177
1178/// Return `true` if the EHLO capability lines advertise the `SMTPUTF8`
1179/// extension (RFC 6531). The check is case-insensitive on the keyword.
1180///
1181/// `capability_lines` is the slice of lines that follows the greeting in
1182/// an `EHLO` reply.
1183#[cfg(feature = "smtputf8")]
1184pub fn ehlo_advertises_smtputf8<S: AsRef<str>>(capability_lines: &[S]) -> bool {
1185 for line in capability_lines {
1186 if let Some(head) = line.as_ref().split_ascii_whitespace().next()
1187 && head.eq_ignore_ascii_case("SMTPUTF8")
1188 {
1189 return true;
1190 }
1191 }
1192 false
1193}
1194
1195/// Validate an envelope address, allowing UTF-8 codepoints in addition
1196/// to the ASCII subset accepted by [`validate_address`].
1197///
1198/// The structural rules are the same as the ASCII validator — the
1199/// address must be non-empty, must not contain CR / LF / NUL, must
1200/// not contain `<`, `>`, ASCII whitespace, ASCII control characters
1201/// (C0 + DEL), or C1 control characters (U+0080-U+009F). Any other
1202/// Unicode codepoint is permitted; the dot-atom structure is left
1203/// for the server to validate.
1204///
1205/// Note that ASCII whitespace (`' '` and `'\t'`) is rejected because
1206/// it would corrupt the SMTP command framing, but other Unicode
1207/// whitespace categories such as U+3000 IDEOGRAPHIC SPACE are
1208/// allowed: they are valid characters in mailbox local parts in
1209/// some scripts and the SMTP layer never tokenizes on them.
1210#[cfg(feature = "smtputf8")]
1211pub fn validate_address_utf8(addr: &str) -> Result<(), InvalidInputError> {
1212 if addr.is_empty() {
1213 return Err(InvalidInputError::new("mail address must not be empty"));
1214 }
1215 // RFC 5321 / 6531 length limits apply on octet counts, not on
1216 // character counts — UTF-8 encoded length is what travels on the
1217 // wire and what counts toward the 254-octet path limit.
1218 if addr.len() > MAX_ADDRESS_LEN {
1219 return Err(InvalidInputError::new(
1220 "mail address exceeds RFC 5321 §4.5.3.1.3 length limit (254 octets)",
1221 ));
1222 }
1223 if let Some(at_pos) = addr.rfind('@') {
1224 let (local, domain) = addr.split_at(at_pos);
1225 let domain = &domain[1..];
1226 if local.len() > MAX_LOCAL_PART_LEN {
1227 return Err(InvalidInputError::new(
1228 "mail address local-part exceeds RFC 5321 §4.5.3.1.1 length limit (64 octets)",
1229 ));
1230 }
1231 if domain.len() > MAX_DOMAIN_LEN {
1232 return Err(InvalidInputError::new(
1233 "mail address domain exceeds RFC 5321 §4.5.3.1.2 length limit (255 octets)",
1234 ));
1235 }
1236 }
1237 for ch in addr.chars() {
1238 match ch {
1239 '\r' | '\n' => {
1240 return Err(InvalidInputError::new(
1241 "mail address must not contain CR or LF",
1242 ));
1243 }
1244 '\0' => {
1245 return Err(InvalidInputError::new(
1246 "mail address must not contain a NUL byte",
1247 ));
1248 }
1249 '<' | '>' => {
1250 return Err(InvalidInputError::new(
1251 "mail address must not contain ASCII < or >",
1252 ));
1253 }
1254 ' ' | '\t' => {
1255 return Err(InvalidInputError::new(
1256 "mail address must not contain ASCII whitespace",
1257 ));
1258 }
1259 // ASCII control characters (C0 + DEL) other than the
1260 // CR/LF/NUL we caught above. (Tab was caught as
1261 // whitespace above.)
1262 c if (c as u32) < 0x20 || (c as u32) == 0x7F => {
1263 return Err(InvalidInputError::new(
1264 "mail address must not contain ASCII control characters",
1265 ));
1266 }
1267 // C1 control characters (U+0080-U+009F).
1268 c if (0x80..=0x9F).contains(&(c as u32)) => {
1269 return Err(InvalidInputError::new(
1270 "mail address must not contain C1 control characters",
1271 ));
1272 }
1273 _ => {}
1274 }
1275 }
1276 Ok(())
1277}
1278
1279/// Format `MAIL FROM:<addr> SMTPUTF8\r\n` as bytes.
1280///
1281/// The `SMTPUTF8` ESMTP parameter (RFC 6531 §3.4) signals to the
1282/// server that the upcoming envelope and message contain UTF-8.
1283/// Servers that did not advertise the extension will reject the
1284/// command; callers should confirm advertisement with
1285/// [`ehlo_advertises_smtputf8`] before invoking this helper.
1286///
1287/// Address validation is the caller's responsibility (use
1288/// [`validate_address_utf8`]); this helper formats unconditionally.
1289#[cfg(feature = "smtputf8")]
1290#[must_use]
1291pub fn format_mail_from_smtputf8(addr: &str) -> Vec<u8> {
1292 // "MAIL FROM:<" (11) + addr + "> SMTPUTF8\r\n" (12) = 23 + addr.len()
1293 let mut out = Vec::with_capacity(23 + addr.len());
1294 out.extend_from_slice(b"MAIL FROM:<");
1295 out.extend_from_slice(addr.as_bytes());
1296 out.extend_from_slice(b"> SMTPUTF8\r\n");
1297 out
1298}