1#![doc = include_str!("../README.md")]
2#![deny(missing_docs)]
3#![deny(rustdoc::broken_intra_doc_links)]
4
5use std::borrow::Cow;
11
12use base64::{engine::general_purpose::STANDARD as B64, Engine as _};
13
14pub fn encode(input: &str) -> Cow<'_, str> {
36 if input.is_ascii() && !input.as_bytes().windows(2).any(|w| w == b"=?") {
43 return Cow::Borrowed(input);
44 }
45 let encoded = B64.encode(input.as_bytes());
46 let mut out = String::with_capacity(12 + encoded.len());
49 out.push_str("=?UTF-8?B?");
50 out.push_str(&encoded);
51 out.push_str("?=");
52 Cow::Owned(out)
53}
54
55pub fn decode(input: &[u8]) -> Cow<'_, str> {
85 if !contains_encoded_word(input) {
88 return match std::str::from_utf8(input) {
89 Ok(s) => Cow::Borrowed(s),
90 Err(_) => Cow::Owned(String::from_utf8_lossy(input).into_owned()),
91 };
92 }
93
94 let mut out = String::with_capacity(input.len());
95 let mut cursor = 0usize;
96 let mut last_was_encoded = false;
97 let mut pending_ws_start: Option<usize> = None;
98
99 while cursor < input.len() {
100 match find_encoded_word_start(input, cursor) {
101 Some(start) => {
102 if start > cursor {
104 let raw = &input[cursor..start];
105 if last_was_encoded && raw.iter().all(|&b| matches!(b, b' ' | b'\t')) {
110 pending_ws_start = Some(start); } else {
112 if let Some(ws_start) = pending_ws_start {
117 let _ = ws_start;
121 pending_ws_start = None;
122 }
123 push_lossy(&mut out, raw);
124 }
125 }
126 match find_encoded_word_end(input, start) {
127 Some((charset, encoding, text, end)) => {
128 decode_encoded_word(&mut out, charset, encoding, text);
129 cursor = end;
130 last_was_encoded = true;
131 pending_ws_start = None;
132 }
133 None => {
134 out.push('=');
137 out.push('?');
138 cursor = start + 2;
139 last_was_encoded = false;
140 }
141 }
142 }
143 None => {
144 let raw = &input[cursor..];
145 push_lossy(&mut out, raw);
146 break;
147 }
148 }
149 }
150
151 Cow::Owned(out)
152}
153
154fn contains_encoded_word(input: &[u8]) -> bool {
157 let mut i = 0;
158 while i + 1 < input.len() {
159 if input[i] == b'=' && input[i + 1] == b'?' {
160 return true;
161 }
162 i += 1;
163 }
164 false
165}
166
167fn find_encoded_word_start(input: &[u8], from: usize) -> Option<usize> {
170 let mut i = from;
171 while i + 1 < input.len() {
172 if input[i] == b'=' && input[i + 1] == b'?' {
173 return Some(i);
174 }
175 i += 1;
176 }
177 None
178}
179
180fn find_encoded_word_end(input: &[u8], start: usize) -> Option<(&[u8], u8, &[u8], usize)> {
187 let charset_start = start + 2;
189 if charset_start >= input.len() {
190 return None;
191 }
192 let q1 = (charset_start..input.len()).find(|&i| input[i] == b'?')?;
193 let charset = &input[charset_start..q1];
194 if charset.is_empty() {
195 return None;
196 }
197 let encoding_byte_pos = q1 + 1;
198 if encoding_byte_pos >= input.len() {
199 return None;
200 }
201 let encoding = input[encoding_byte_pos];
202 if !matches!(encoding, b'B' | b'b' | b'Q' | b'q') {
203 return None;
204 }
205 let q2 = encoding_byte_pos + 1;
206 if q2 >= input.len() || input[q2] != b'?' {
207 return None;
208 }
209 let text_start = q2 + 1;
210 let mut i = text_start;
212 while i + 1 < input.len() {
213 if input[i] == b'?' && input[i + 1] == b'=' {
214 return Some((charset, encoding, &input[text_start..i], i + 2));
215 }
216 i += 1;
217 }
218 None
219}
220
221fn decode_encoded_word(out: &mut String, charset: &[u8], encoding: u8, text: &[u8]) {
224 let raw_bytes = match encoding {
225 b'B' | b'b' => match B64.decode(text) {
226 Ok(b) => b,
227 Err(_) => {
228 push_lossy(out, text);
230 return;
231 }
232 },
233 b'Q' | b'q' => decode_q(text),
234 _ => return,
235 };
236 convert_to_utf8(out, charset, &raw_bytes);
237}
238
239fn decode_q(text: &[u8]) -> Vec<u8> {
245 let mut out = Vec::with_capacity(text.len());
246 let mut i = 0;
247 while i < text.len() {
248 match text[i] {
249 b'_' => {
250 out.push(b' ');
251 i += 1;
252 }
253 b'=' if i + 2 < text.len() => {
254 let hi = hex_nibble(text[i + 1]);
255 let lo = hex_nibble(text[i + 2]);
256 match (hi, lo) {
257 (Some(h), Some(l)) => {
258 out.push((h << 4) | l);
259 i += 3;
260 }
261 _ => {
262 out.push(b'=');
264 i += 1;
265 }
266 }
267 }
268 _ => {
269 out.push(text[i]);
270 i += 1;
271 }
272 }
273 }
274 out
275}
276
277#[inline]
278fn hex_nibble(b: u8) -> Option<u8> {
279 match b {
280 b'0'..=b'9' => Some(b - b'0'),
281 b'A'..=b'F' => Some(b - b'A' + 10),
282 b'a'..=b'f' => Some(b - b'a' + 10),
283 _ => None,
284 }
285}
286
287fn convert_to_utf8(out: &mut String, charset: &[u8], bytes: &[u8]) {
289 let encoding = encoding_rs::Encoding::for_label(charset);
290 let encoding = encoding.unwrap_or(encoding_rs::UTF_8);
291 let (cow, _, _) = encoding.decode(bytes);
292 out.push_str(&cow);
293}
294
295fn push_lossy(out: &mut String, bytes: &[u8]) {
297 match std::str::from_utf8(bytes) {
298 Ok(s) => out.push_str(s),
299 Err(_) => out.push_str(&String::from_utf8_lossy(bytes)),
300 }
301}
302
303#[cfg(test)]
304mod tests {
305 use super::*;
306
307 #[test]
308 fn encode_ascii_is_borrowed() {
309 let r = encode("Hello World");
310 assert_eq!(r, "Hello World");
311 assert!(matches!(r, Cow::Borrowed(_)));
312 }
313
314 #[test]
315 fn encode_japanese() {
316 let r = encode("日本語");
317 assert_eq!(r, "=?UTF-8?B?5pel5pys6Kqe?=");
318 }
319
320 #[test]
321 fn encode_roundtrip_via_decode() {
322 let original = "café — 日本語 — émoji 🦀";
323 let encoded = encode(original);
324 let decoded = decode(encoded.as_bytes());
326 assert_eq!(decoded, original);
327 }
328
329 #[test]
330 fn encode_empty_string() {
331 let r = encode("");
332 assert_eq!(r, "");
333 assert!(matches!(r, Cow::Borrowed(_)));
334 }
335
336 #[test]
337 fn encode_pure_emoji() {
338 let r = encode("🦀🚀");
339 assert!(r.starts_with("=?UTF-8?B?"));
341 assert!(r.ends_with("?="));
342 let decoded = decode(r.as_bytes());
344 assert_eq!(decoded, "🦀🚀");
345 }
346
347 #[test]
348 fn plain_ascii_is_borrowed() {
349 let r = decode(b"hello world");
350 assert_eq!(r, "hello world");
351 assert!(matches!(r, Cow::Borrowed(_)));
352 }
353
354 #[test]
355 fn utf8_no_encoding_returns_borrowed() {
356 let r = decode("héllo".as_bytes());
357 assert_eq!(r, "héllo");
358 assert!(matches!(r, Cow::Borrowed(_)));
359 }
360
361 #[test]
362 fn base64_utf8() {
363 let r = decode(b"=?UTF-8?B?VGVzdA==?=");
364 assert_eq!(r, "Test");
365 }
366
367 #[test]
368 fn quoted_printable_utf8() {
369 let r = decode(b"=?UTF-8?Q?Hello=20World?=");
370 assert_eq!(r, "Hello World");
371 }
372
373 #[test]
374 fn q_underscore_is_space() {
375 let r = decode(b"=?UTF-8?Q?Hello_World?=");
376 assert_eq!(r, "Hello World");
377 }
378
379 #[test]
380 fn q_lowercase_encoding_marker() {
381 let r = decode(b"=?utf-8?q?ohai?=");
382 assert_eq!(r, "ohai");
383 }
384
385 #[test]
386 fn b_lowercase_encoding_marker() {
387 let r = decode(b"=?utf-8?b?dGVzdA==?=");
388 assert_eq!(r, "test");
389 }
390
391 #[test]
392 fn iso_8859_1() {
393 let r = decode(b"=?iso-8859-1?B?Y2Fm6Q==?=");
396 assert_eq!(r, "café");
397 }
398
399 #[test]
400 fn iso_2022_jp_japanese() {
401 let r = decode(b"=?ISO-2022-JP?B?GyRCJDMkcyRLJEEkTxsoQg==?=");
404 assert_eq!(r, "こんにちは");
405 }
406
407 #[test]
408 fn mixed_ascii_and_encoded() {
409 let r = decode(b"Prefix =?UTF-8?B?VGVzdA==?= Suffix");
410 assert_eq!(r, "Prefix Test Suffix");
411 }
412
413 #[test]
414 fn adjacent_encoded_words_collapse_whitespace() {
415 let r = decode(b"=?UTF-8?B?aGVsbG8=?= =?UTF-8?B?d29ybGQ=?=");
417 assert_eq!(r, "helloworld");
418 }
419
420 #[test]
421 fn whitespace_preserved_around_ascii_run() {
422 let r = decode(b"=?UTF-8?B?aGVsbG8=?= mid =?UTF-8?B?d29ybGQ=?=");
423 assert_eq!(r, "hello mid world");
424 }
425
426 #[test]
427 fn malformed_no_closing_returns_literal_lead_in() {
428 let r = decode(b"=?UTF-8?B?VGVzdA");
429 assert!(r.starts_with("=?"));
431 }
432
433 #[test]
434 fn malformed_empty_charset_kept_literal() {
435 let r = decode(b"=??B?VGVzdA==?=");
436 assert!(r.starts_with("=?"));
438 }
439
440 #[test]
441 fn malformed_unknown_encoding_kept_literal() {
442 let r = decode(b"=?UTF-8?X?garbage?=");
443 assert!(r.starts_with("=?"));
445 }
446
447 #[test]
448 fn empty_input_returns_empty() {
449 assert_eq!(decode(b""), "");
450 }
451
452 #[test]
453 fn invalid_utf8_in_unencoded_returns_lossy() {
454 let r = decode(&[0xFF, 0xFE, b'h', b'i']);
455 assert!(r.contains("hi"));
457 }
458
459 #[test]
460 fn q_encoding_malformed_hex() {
461 let r = decode(b"=?UTF-8?Q?abc=ZZdef?=");
462 assert!(r.contains("abc"));
464 assert!(r.contains("def"));
465 }
466
467 #[test]
468 fn unknown_charset_falls_through_to_utf8() {
469 let r = decode(b"=?x-fake-charset?B?aGVsbG8=?=");
470 assert_eq!(r, "hello");
473 }
474
475 #[test]
478 fn q_encoding_with_latin1_chars() {
479 let r = decode(b"=?iso-8859-1?Q?caf=E9?=");
482 assert_eq!(r, "café");
483 }
484
485 #[test]
486 fn empty_encoded_word_body() {
487 let r = decode(b"=?UTF-8?B??=");
489 assert_eq!(r, "");
490 }
491
492 #[test]
493 fn adjacent_words_different_charsets_no_collapse() {
494 let r = decode(b"=?UTF-8?B?aGk=?= =?iso-8859-1?B?aGk=?=");
499 assert_eq!(r, "hihi");
501 }
502
503 #[test]
504 fn encoded_word_at_very_start_of_input() {
505 let r = decode(b"=?UTF-8?B?aGVsbG8=?= trailing text");
506 assert_eq!(r, "hello trailing text");
507 }
508
509 #[test]
510 fn encoded_word_at_very_end_of_input() {
511 let r = decode(b"leading text =?UTF-8?B?aGVsbG8=?=");
512 assert_eq!(r, "leading text hello");
513 }
514
515 #[test]
516 fn encoded_word_in_middle_of_quoted_string() {
517 let r = decode(b"\"=?UTF-8?B?aGVsbG8=?=\" <addr@example.com>");
521 assert!(r.contains("hello"));
523 assert!(r.contains("<addr@example.com>"));
524 }
525
526 #[test]
527 fn charset_case_insensitive_match() {
528 let r1 = decode(b"=?UTF-8?B?aGk=?=");
530 let r2 = decode(b"=?utf-8?B?aGk=?=");
531 let r3 = decode(b"=?Utf-8?B?aGk=?=");
532 let r4 = decode(b"=?UtF-8?B?aGk=?=");
533 assert_eq!(r1, r2);
534 assert_eq!(r2, r3);
535 assert_eq!(r3, r4);
536 }
537
538 #[test]
539 fn shift_jis_japanese_decode() {
540 let r = decode(b"=?Shift_JIS?B?g2WDWINn?=");
543 assert_eq!(r, "テスト");
544 }
545
546 #[test]
547 fn euc_jp_japanese_decode() {
548 let r = decode(b"=?EUC-JP?B?pcaluaXI?=");
551 assert_eq!(r, "テスト");
552 }
553
554 #[test]
555 fn big5_chinese_decode() {
556 let r = decode(b"=?Big5?B?p0GmbA==?=");
559 assert!(!r.is_empty());
561 }
562
563 #[test]
564 fn q_encoding_uppercase_hex() {
565 let r = decode(b"=?UTF-8?Q?=E6=97=A5=E6=9C=AC=E8=AA=9E?=");
566 assert_eq!(r, "日本語");
568 }
569
570 #[test]
571 fn q_encoding_lowercase_hex_tolerated() {
572 let r = decode(b"=?UTF-8?Q?=e6=97=a5?=");
575 assert_eq!(r, "日");
577 }
578
579 #[test]
580 fn encoded_word_with_underscore_and_equals() {
581 let r = decode(b"=?UTF-8?Q?Hello_World=21?=");
584 assert_eq!(r, "Hello World!");
585 }
586
587 #[test]
590 fn encode_preserves_short_ascii() {
591 let r = encode("test");
593 assert_eq!(r, "test");
594 assert!(matches!(r, Cow::Borrowed(_)));
595 }
596
597 #[test]
598 fn encode_decode_roundtrip_iso_2022_jp_via_utf8_wrapping() {
599 let original = "明日午前9時の会議";
602 let encoded = encode(original);
603 let decoded = decode(encoded.as_bytes());
604 assert_eq!(decoded, original);
605 }
606
607 #[test]
608 fn encode_string_with_mixed_ascii_and_unicode() {
609 let r = encode("Hello 世界");
611 assert!(r.starts_with("=?UTF-8?B?"));
612 let back = decode(r.as_bytes());
613 assert_eq!(back, "Hello 世界");
614 }
615}