email_encoding/body/
chooser.rs1use std::mem;
2
3use super::{Encoding, StrOrBytes};
4
5enum InputKind {
6 Ascii,
7 Utf8,
8 Binary,
9}
10
11impl<'a> StrOrBytes<'a> {
12 fn kind(&self) -> InputKind {
13 if self.is_ascii() {
14 InputKind::Ascii
15 } else {
16 match self {
17 Self::Str(_) => InputKind::Utf8,
18 Self::Bytes(_) => InputKind::Binary,
19 }
20 }
21 }
22}
23
24impl Encoding {
25 pub fn choose<'a>(input: impl Into<StrOrBytes<'a>>, supports_utf8: bool) -> Self {
76 let input = input.into();
77 Self::choose_impl(input, supports_utf8)
78 }
79
80 fn choose_impl(input: StrOrBytes<'_>, supports_utf8: bool) -> Self {
81 let line_too_long = line_too_long(&input);
82
83 match (input.kind(), line_too_long, supports_utf8) {
84 (InputKind::Ascii, false, _) => {
85 Self::SevenBit
87 }
88 (InputKind::Ascii, true, _) => {
89 quoted_printable_or_base64(&input)
91 }
92 (InputKind::Utf8, false, true) => {
93 Self::EightBit
95 }
96 (InputKind::Utf8, true, true) => {
97 quoted_printable_or_base64(&input)
99 }
100 (InputKind::Utf8, _, false) => {
101 quoted_printable_or_base64(&input)
103 }
104 (InputKind::Binary, _, _) => {
105 Self::Base64
107 }
108 }
109 }
110}
111
112fn line_too_long(b: &[u8]) -> bool {
113 let mut last = 0;
114 memchr::memchr_iter(b'\n', b).any(|i| {
115 let last_ = mem::replace(&mut last, i);
116 (i - last_) >= 76
117 }) || (b.len() - last) >= 76
118}
119
120fn quoted_printable_or_base64(b: &[u8]) -> Encoding {
121 if quoted_printable_efficient(b) {
122 Encoding::QuotedPrintable
123 } else {
124 Encoding::Base64
125 }
126}
127
128fn quoted_printable_efficient(b: &[u8]) -> bool {
129 let requiring_escaping = b
130 .iter()
131 .filter(|&b| !matches!(b, b'\t' | b' '..=b'~'))
132 .count();
133 requiring_escaping <= (b.len() / 3) }
135
136#[cfg(test)]
137mod tests {
138 use super::{line_too_long, Encoding};
139
140 #[test]
141 fn ascii_short_str() {
142 let input = "0123";
143
144 assert_eq!(Encoding::choose(input, false), Encoding::SevenBit);
145 }
146
147 #[test]
148 fn ascii_long_str() {
149 let input = concat!(
150 "0123\n",
151 "01234567899876543210012345678998765432100123456789987654321001234567899876543210\n",
152 "4567"
153 );
154
155 assert_eq!(Encoding::choose(input, false), Encoding::QuotedPrintable);
156 }
157
158 #[test]
159 fn ascii_short_binary() {
160 let input = b"0123";
161
162 assert_eq!(Encoding::choose(input, false), Encoding::SevenBit);
163 }
164
165 #[test]
166 fn ascii_long_binary() {
167 let input = concat!(
168 "0123\n",
169 "01234567899876543210012345678998765432100123456789987654321001234567899876543210\n",
170 "4567"
171 )
172 .as_bytes();
173
174 assert_eq!(Encoding::choose(input, false), Encoding::QuotedPrintable);
175 }
176
177 #[test]
178 fn utf8_short_str_supported() {
179 let input = "0123 📬";
180
181 assert_eq!(Encoding::choose(input, true), Encoding::EightBit);
182 }
183
184 #[test]
185 fn utf8_short_str_unsupported_efficient() {
186 let input = "01234567899876543210 📬";
187
188 assert_eq!(Encoding::choose(input, false), Encoding::QuotedPrintable);
189 }
190
191 #[test]
192 fn utf8_short_str_unsupported_inefficient() {
193 let input = "0123 📬";
194
195 assert_eq!(Encoding::choose(input, false), Encoding::Base64);
196 }
197
198 #[test]
199 fn utf8_long_str_efficient() {
200 let input =
201 "01234567899876543210012345678998765432100123456789987654321001234567899876543210";
202
203 assert_eq!(Encoding::choose(input, true), Encoding::QuotedPrintable);
204 }
205
206 #[test]
207 fn utf8_long_str_inefficient() {
208 let input = "0123 📬📬📬📬📬📬📬📬📬📬📬📬📬📬📬📬📬📬📬📬📬📬📬📬📬📬📬📬📬📬📬📬📬📬📬📬📬📬📬📬📬📬📬📬";
209
210 assert_eq!(Encoding::choose(input, true), Encoding::Base64);
211 }
212
213 #[test]
214 fn binary() {
215 let input = &[255, 234, b'A', b'C', 210];
216
217 assert_eq!(Encoding::choose(input, false), Encoding::Base64);
218 }
219
220 #[test]
221 fn not_too_long_oneline() {
222 let input = b"0123";
223
224 assert!(!line_too_long(input));
225 }
226
227 #[test]
228 fn not_too_long_multiline() {
229 let input = concat!(
230 "0123\n",
231 "4567\n",
232 "00000000000000000000000000000000000000000\n",
233 "89"
234 )
235 .as_bytes();
236
237 assert!(!line_too_long(input));
238 }
239
240 #[test]
241 fn too_long_oneline() {
242 let input =
243 b"01234567899876543210012345678998765432100123456789987654321001234567899876543210";
244
245 assert!(line_too_long(input));
246 }
247
248 #[test]
249 fn too_long_multiline() {
250 let input = concat!(
251 "0123\n",
252 "01234567899876543210012345678998765432100123456789987654321001234567899876543210\n",
253 "4567"
254 )
255 .as_bytes();
256
257 assert!(line_too_long(input));
258 }
259}