email_encoding/body/
chooser.rs

1use std::mem;
2
3use super::{Encoding, StrOrBytes};
4
5enum InputKind {
6    Ascii,
7    Utf8,
8    Binary,
9}
10
11impl<'a> StrOrBytes<'a> {
12    fn kind(&self) -> InputKind {
13        if self.is_ascii() {
14            InputKind::Ascii
15        } else {
16            match self {
17                Self::Str(_) => InputKind::Utf8,
18                Self::Bytes(_) => InputKind::Binary,
19            }
20        }
21    }
22}
23
24impl Encoding {
25    /// Choose the most efficient `Encoding` for `input`
26    ///
27    /// Look into `input` and decide what encoding format could best
28    /// be used to represent it.
29    ///
30    /// If the SMTP server supports the `SMTPUTF8` extension
31    /// `supports_utf8` _may_ me set to `true`, otherwise `false`
32    /// is the safest option.
33    ///
34    /// Possible return values based on `supports_utf8`
35    ///
36    /// | `Encoding`         | `false` | `true` |
37    /// | ------------------ | ------- | ------ |
38    /// | `7bit`             | ✅      | ✅     |
39    /// | `8bit`             | ❌      | ✅     |
40    /// | `quoted-printable` | ✅      | ✅     |
41    /// | `base64`           | ✅      | ✅     |
42    ///
43    /// # Examples
44    ///
45    /// ```rust
46    /// # use email_encoding::body::Encoding;
47    /// // Ascii
48    /// {
49    ///     let input = "Hello, World!";
50    ///     assert_eq!(Encoding::choose(input, false), Encoding::SevenBit);
51    ///     assert_eq!(Encoding::choose(input, true), Encoding::SevenBit);
52    /// }
53    ///
54    /// // Mostly ascii + utf-8
55    /// {
56    ///     let input = "Hello, World! 📬";
57    ///     assert_eq!(Encoding::choose(input, false), Encoding::QuotedPrintable);
58    ///     assert_eq!(Encoding::choose(input, true), Encoding::EightBit);
59    /// }
60    ///
61    /// // Mostly utf-8
62    /// {
63    ///     let input = "Hello! 📬📬📬📬📬📬📬📬📬📬";
64    ///     assert_eq!(Encoding::choose(input, false), Encoding::Base64);
65    ///     assert_eq!(Encoding::choose(input, true), Encoding::EightBit);
66    /// }
67    ///
68    /// // Non utf-8 bytes
69    /// {
70    ///     let input = &[255, 35, 123, 190];
71    ///     assert_eq!(Encoding::choose(input, false), Encoding::Base64);
72    ///     assert_eq!(Encoding::choose(input, true), Encoding::Base64);
73    /// }
74    /// ```
75    pub fn choose<'a>(input: impl Into<StrOrBytes<'a>>, supports_utf8: bool) -> Self {
76        let input = input.into();
77        Self::choose_impl(input, supports_utf8)
78    }
79
80    fn choose_impl(input: StrOrBytes<'_>, supports_utf8: bool) -> Self {
81        let line_too_long = line_too_long(&input);
82
83        match (input.kind(), line_too_long, supports_utf8) {
84            (InputKind::Ascii, false, _) => {
85                // Input is ascii and fits the maximum line length
86                Self::SevenBit
87            }
88            (InputKind::Ascii, true, _) => {
89                // Input is ascii but doesn't fix the maximum line length
90                quoted_printable_or_base64(&input)
91            }
92            (InputKind::Utf8, false, true) => {
93                // Input is utf-8, line fits, the server supports it
94                Self::EightBit
95            }
96            (InputKind::Utf8, true, true) => {
97                // Input is utf-8, line doesn't fit, the server supports it
98                quoted_printable_or_base64(&input)
99            }
100            (InputKind::Utf8, _, false) => {
101                // Input is utf-8, the server doesn't support it
102                quoted_printable_or_base64(&input)
103            }
104            (InputKind::Binary, _, _) => {
105                // Input is binary
106                Self::Base64
107            }
108        }
109    }
110}
111
112fn line_too_long(b: &[u8]) -> bool {
113    let mut last = 0;
114    memchr::memchr_iter(b'\n', b).any(|i| {
115        let last_ = mem::replace(&mut last, i);
116        (i - last_) >= 76
117    }) || (b.len() - last) >= 76
118}
119
120fn quoted_printable_or_base64(b: &[u8]) -> Encoding {
121    if quoted_printable_efficient(b) {
122        Encoding::QuotedPrintable
123    } else {
124        Encoding::Base64
125    }
126}
127
128fn quoted_printable_efficient(b: &[u8]) -> bool {
129    let requiring_escaping = b
130        .iter()
131        .filter(|&b| !matches!(b, b'\t' | b' '..=b'~'))
132        .count();
133    requiring_escaping <= (b.len() / 3) // 33.33% or less
134}
135
136#[cfg(test)]
137mod tests {
138    use super::{line_too_long, Encoding};
139
140    #[test]
141    fn ascii_short_str() {
142        let input = "0123";
143
144        assert_eq!(Encoding::choose(input, false), Encoding::SevenBit);
145    }
146
147    #[test]
148    fn ascii_long_str() {
149        let input = concat!(
150            "0123\n",
151            "01234567899876543210012345678998765432100123456789987654321001234567899876543210\n",
152            "4567"
153        );
154
155        assert_eq!(Encoding::choose(input, false), Encoding::QuotedPrintable);
156    }
157
158    #[test]
159    fn ascii_short_binary() {
160        let input = b"0123";
161
162        assert_eq!(Encoding::choose(input, false), Encoding::SevenBit);
163    }
164
165    #[test]
166    fn ascii_long_binary() {
167        let input = concat!(
168            "0123\n",
169            "01234567899876543210012345678998765432100123456789987654321001234567899876543210\n",
170            "4567"
171        )
172        .as_bytes();
173
174        assert_eq!(Encoding::choose(input, false), Encoding::QuotedPrintable);
175    }
176
177    #[test]
178    fn utf8_short_str_supported() {
179        let input = "0123 📬";
180
181        assert_eq!(Encoding::choose(input, true), Encoding::EightBit);
182    }
183
184    #[test]
185    fn utf8_short_str_unsupported_efficient() {
186        let input = "01234567899876543210 📬";
187
188        assert_eq!(Encoding::choose(input, false), Encoding::QuotedPrintable);
189    }
190
191    #[test]
192    fn utf8_short_str_unsupported_inefficient() {
193        let input = "0123 📬";
194
195        assert_eq!(Encoding::choose(input, false), Encoding::Base64);
196    }
197
198    #[test]
199    fn utf8_long_str_efficient() {
200        let input =
201            "01234567899876543210012345678998765432100123456789987654321001234567899876543210";
202
203        assert_eq!(Encoding::choose(input, true), Encoding::QuotedPrintable);
204    }
205
206    #[test]
207    fn utf8_long_str_inefficient() {
208        let input = "0123 📬📬📬📬📬📬📬📬📬📬📬📬📬📬📬📬📬📬📬📬📬📬📬📬📬📬📬📬📬📬📬📬📬📬📬📬📬📬📬📬📬📬📬📬";
209
210        assert_eq!(Encoding::choose(input, true), Encoding::Base64);
211    }
212
213    #[test]
214    fn binary() {
215        let input = &[255, 234, b'A', b'C', 210];
216
217        assert_eq!(Encoding::choose(input, false), Encoding::Base64);
218    }
219
220    #[test]
221    fn not_too_long_oneline() {
222        let input = b"0123";
223
224        assert!(!line_too_long(input));
225    }
226
227    #[test]
228    fn not_too_long_multiline() {
229        let input = concat!(
230            "0123\n",
231            "4567\n",
232            "00000000000000000000000000000000000000000\n",
233            "89"
234        )
235        .as_bytes();
236
237        assert!(!line_too_long(input));
238    }
239
240    #[test]
241    fn too_long_oneline() {
242        let input =
243            b"01234567899876543210012345678998765432100123456789987654321001234567899876543210";
244
245        assert!(line_too_long(input));
246    }
247
248    #[test]
249    fn too_long_multiline() {
250        let input = concat!(
251            "0123\n",
252            "01234567899876543210012345678998765432100123456789987654321001234567899876543210\n",
253            "4567"
254        )
255        .as_bytes();
256
257        assert!(line_too_long(input));
258    }
259}