Skip to main content

mailrs_mail_builder/
encode.rs

1//! Encoding helpers: CTE selection, quoted-printable, base64,
2//! header folding, encoded-word header escapes.
3
4use base64::Engine;
5
6/// MIME Content-Transfer-Encoding choices the builder picks from.
7#[derive(Debug, Clone, Copy, PartialEq, Eq)]
8pub enum ContentTransferEncoding {
9    /// `7bit` — body is pure ASCII with every line ≤ 998 chars (RFC
10    /// 5322 §2.1.1) and no NUL bytes. Body is emitted verbatim.
11    SevenBit,
12    /// `8bit` — body has ≥1 high-bit byte but every line is still
13    /// short and the body is text-shaped. Emitted verbatim. Use
14    /// requires the receiving MTA to support 8BITMIME — most do,
15    /// but for compatibility callers can prefer `QuotedPrintable`.
16    EightBit,
17    /// `quoted-printable` — body has high-bit bytes or long lines
18    /// and is text-shaped. Output is wrapped at 76 chars per RFC
19    /// 2045 §6.7.
20    QuotedPrintable,
21    /// `base64` — body looks binary (high non-printable density or
22    /// embedded NUL). Output is 76-char wrapped per RFC 2045 §6.8.
23    Base64,
24}
25
26impl ContentTransferEncoding {
27    /// The header-value string used in the `Content-Transfer-Encoding:`
28    /// header.
29    pub fn as_str(self) -> &'static str {
30        match self {
31            Self::SevenBit => "7bit",
32            Self::EightBit => "8bit",
33            Self::QuotedPrintable => "quoted-printable",
34            Self::Base64 => "base64",
35        }
36    }
37}
38
39/// Pick the canonical CTE for `body`.
40///
41/// Heuristic, in order:
42/// 1. Embedded NUL or > 15 % non-printable bytes → `Base64`
43///    (treated as binary).
44/// 2. Any byte > 0x7F **or** any line longer than 78 chars →
45///    `QuotedPrintable` (text but needs wrapping / escaping).
46/// 3. Otherwise `SevenBit` (pure ASCII, short lines).
47pub fn choose_cte(body: &[u8]) -> ContentTransferEncoding {
48    if body.is_empty() {
49        return ContentTransferEncoding::SevenBit;
50    }
51    // "Non-text" bytes for the base64-vs-qp decision: ASCII
52    // control characters other than \t / \r / \n. We deliberately
53    // do NOT count > 0x7F here — utf-8 encoded text is high-bit-
54    // heavy but still text-shaped and should ride quoted-printable,
55    // not base64.
56    let mut control_bytes = 0usize;
57    let mut has_high_bit = false;
58    let mut max_line = 0usize;
59    let mut cur_line = 0usize;
60    let mut has_nul = false;
61    for &b in body {
62        if b == 0 {
63            has_nul = true;
64        }
65        if b > 0x7F {
66            has_high_bit = true;
67        }
68        let is_control = b < 0x20 && b != b'\t' && b != b'\r' && b != b'\n';
69        if is_control || b == 0x7F {
70            control_bytes += 1;
71        }
72        if b == b'\n' {
73            if cur_line > max_line {
74                max_line = cur_line;
75            }
76            cur_line = 0;
77        } else {
78            cur_line += 1;
79        }
80    }
81    if cur_line > max_line {
82        max_line = cur_line;
83    }
84
85    if has_nul || (!body.is_empty() && control_bytes * 100 / body.len() > 15) {
86        return ContentTransferEncoding::Base64;
87    }
88    if has_high_bit || max_line > 78 {
89        return ContentTransferEncoding::QuotedPrintable;
90    }
91    ContentTransferEncoding::SevenBit
92}
93
94/// Quoted-printable encode per RFC 2045 §6.7 with soft line breaks
95/// every 76 chars (`=\r\n`). Input is treated as bytes; output is
96/// ASCII-safe.
97pub fn encode_quoted_printable(body: &[u8]) -> String {
98    let mut out = String::with_capacity(body.len() + body.len() / 3);
99    let mut line_len = 0usize;
100
101    fn needs_escape(b: u8, _at_eol: bool) -> bool {
102        // RFC 2045 §6.7: bytes 33-60 and 62-126 may be sent
103        // verbatim. Tab (0x09) and space (0x20) may also be
104        // verbatim except at end-of-line; we handle EOL whitespace
105        // by always escaping trailing SP/TAB.
106        matches!(b, 33..=60 | 62..=126)
107    }
108
109    let push_soft_break = |out: &mut String, line_len: &mut usize| {
110        out.push_str("=\r\n");
111        *line_len = 0;
112    };
113
114    let mut iter = body.iter().peekable();
115    while let Some(&b) = iter.next() {
116        // CRLF in input is preserved as a hard line break.
117        if b == b'\r' && iter.peek() == Some(&&b'\n') {
118            iter.next();
119            out.push_str("\r\n");
120            line_len = 0;
121            continue;
122        }
123        if b == b'\n' {
124            out.push_str("\r\n");
125            line_len = 0;
126            continue;
127        }
128
129        // Whitespace at end-of-line must be escaped.
130        let next_is_eol = matches!(iter.peek(), Some(&&b'\r' | &&b'\n') | None);
131        let must_escape = if b == b' ' || b == b'\t' {
132            next_is_eol
133        } else {
134            !needs_escape(b, false)
135        };
136
137        let chunk_len = if must_escape { 3 } else { 1 };
138        if line_len + chunk_len > 75 {
139            push_soft_break(&mut out, &mut line_len);
140        }
141
142        if must_escape {
143            use std::fmt::Write;
144            let _ = write!(out, "={b:02X}");
145            line_len += 3;
146        } else {
147            out.push(b as char);
148            line_len += 1;
149        }
150    }
151    out
152}
153
154/// Base64-encode `body` with RFC 2045 §6.8 line breaks every 76
155/// chars.
156pub fn encode_base64(body: &[u8]) -> String {
157    let encoded = base64::engine::general_purpose::STANDARD.encode(body);
158    let mut out = String::with_capacity(encoded.len() + encoded.len() / 76 * 2);
159    let bytes = encoded.as_bytes();
160    let mut idx = 0;
161    while idx < bytes.len() {
162        let end = (idx + 76).min(bytes.len());
163        out.push_str(std::str::from_utf8(&bytes[idx..end]).unwrap());
164        out.push_str("\r\n");
165        idx = end;
166    }
167    out
168}
169
170/// Fold a header value at 78 chars per RFC 5322 §2.2.3 (soft wrap
171/// with CRLF + WSP continuation). The `name:` prefix is included in
172/// the first line's width budget; continuation lines start with a
173/// single ASCII space.
174///
175/// Folding happens at whitespace; if a single token is longer than
176/// the soft limit (e.g. an opaque Message-ID), it is emitted on its
177/// own continuation line without breaking the token itself.
178pub fn fold_header(name: &str, value: &str) -> String {
179    const SOFT_LIMIT: usize = 78;
180
181    let prefix = format!("{name}: ");
182    let mut out = String::with_capacity(value.len() + 8);
183    out.push_str(&prefix);
184
185    if prefix.len() + value.len() <= SOFT_LIMIT && !value.contains('\n') {
186        out.push_str(value);
187        return out;
188    }
189
190    let mut line_len = prefix.len();
191    let mut first_token_on_line = true;
192    for tok in value.split_whitespace() {
193        let sep_len = if first_token_on_line { 0 } else { 1 };
194        if line_len + sep_len + tok.len() > SOFT_LIMIT && !first_token_on_line {
195            out.push_str("\r\n ");
196            line_len = 1;
197            first_token_on_line = true;
198        }
199        if !first_token_on_line {
200            out.push(' ');
201            line_len += 1;
202        }
203        out.push_str(tok);
204        line_len += tok.len();
205        first_token_on_line = false;
206    }
207    out
208}
209
210/// RFC 2047 encoded-word for header values that contain non-ASCII
211/// bytes. ASCII-only inputs pass through unchanged. Quoting around
212/// encoded-words inside structured headers (display-names in
213/// `From:`, `To:`, etc.) is the caller's responsibility.
214pub fn maybe_encode_word(value: &str) -> std::borrow::Cow<'_, str> {
215    if value.is_ascii() {
216        std::borrow::Cow::Borrowed(value)
217    } else {
218        mailrs_rfc2047::encode(value)
219    }
220}
221
222#[cfg(test)]
223mod tests {
224    use super::*;
225
226    #[test]
227    fn cte_empty_is_sevenbit() {
228        assert_eq!(choose_cte(b""), ContentTransferEncoding::SevenBit);
229    }
230
231    #[test]
232    fn cte_short_ascii_is_sevenbit() {
233        assert_eq!(
234            choose_cte(b"hello world\r\nshort line\r\n"),
235            ContentTransferEncoding::SevenBit,
236        );
237    }
238
239    #[test]
240    fn cte_long_ascii_line_is_qp() {
241        let body = format!("{}\r\n", "x".repeat(120));
242        assert_eq!(choose_cte(body.as_bytes()), ContentTransferEncoding::QuotedPrintable);
243    }
244
245    #[test]
246    fn cte_high_bit_is_qp() {
247        assert_eq!(
248            choose_cte("こんにちは".as_bytes()),
249            ContentTransferEncoding::QuotedPrintable,
250        );
251    }
252
253    #[test]
254    fn cte_binary_is_base64() {
255        let bytes: Vec<u8> = (0..=255u8).collect();
256        assert_eq!(choose_cte(&bytes), ContentTransferEncoding::Base64);
257    }
258
259    #[test]
260    fn cte_embedded_nul_is_base64() {
261        assert_eq!(
262            choose_cte(b"hello\x00world"),
263            ContentTransferEncoding::Base64,
264        );
265    }
266
267    #[test]
268    fn qp_pass_through_ascii() {
269        let r = encode_quoted_printable(b"hello world\r\nsecond\r\n");
270        assert_eq!(r, "hello world\r\nsecond\r\n");
271    }
272
273    #[test]
274    fn qp_escapes_equals_sign() {
275        assert_eq!(encode_quoted_printable(b"a=b"), "a=3Db");
276    }
277
278    #[test]
279    fn qp_escapes_high_bit() {
280        // é = 0xC3 0xA9 in utf-8
281        assert_eq!(encode_quoted_printable("é".as_bytes()), "=C3=A9");
282    }
283
284    #[test]
285    fn qp_escapes_trailing_space() {
286        // trailing space at end of input is EOL-adjacent
287        assert_eq!(encode_quoted_printable(b"hello "), "hello=20");
288    }
289
290    #[test]
291    fn qp_wraps_long_lines() {
292        let body = "x".repeat(200);
293        let out = encode_quoted_printable(body.as_bytes());
294        // every produced line must be ≤ 76 chars (incl. trailing "=")
295        for line in out.split("\r\n") {
296            assert!(line.len() <= 76, "line over 76: {line:?}");
297        }
298    }
299
300    #[test]
301    fn base64_wraps_at_76() {
302        let body = vec![0xAB; 200];
303        let out = encode_base64(&body);
304        for line in out.trim_end_matches("\r\n").split("\r\n") {
305            assert!(line.len() <= 76, "line over 76: {line:?}");
306        }
307    }
308
309    #[test]
310    fn fold_short_header_unchanged() {
311        let out = fold_header("Subject", "Hello world");
312        assert_eq!(out, "Subject: Hello world");
313        assert!(!out.contains('\n'));
314    }
315
316    #[test]
317    fn fold_long_subject_wraps() {
318        let value = "the quick brown fox jumps over the lazy dog and the slothful zebra and the gallant elephant";
319        let out = fold_header("Subject", value);
320        // every produced line must be ≤ 78 chars
321        for line in out.split("\r\n") {
322            assert!(line.len() <= 78, "line over 78: {line:?}");
323        }
324        // first line still starts with the header name
325        assert!(out.starts_with("Subject: "));
326        // continuation lines start with a single SP (folding WSP)
327        let parts: Vec<&str> = out.split("\r\n").collect();
328        for p in &parts[1..] {
329            assert!(p.starts_with(' '), "continuation must start with WSP: {p:?}");
330        }
331    }
332
333    #[test]
334    fn maybe_encode_word_ascii_pass_through() {
335        let out = maybe_encode_word("Hello world");
336        assert_eq!(out, "Hello world");
337    }
338
339    #[test]
340    fn maybe_encode_word_non_ascii_uses_encoded_word() {
341        let out = maybe_encode_word("こんにちは");
342        // rfc2047::encode produces =?UTF-8?B?...?= or Q-encoded form
343        assert!(out.starts_with("=?UTF-8?"));
344        assert!(out.ends_with("?="));
345    }
346}