1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
use base64::{
alphabet,
engine::{DecodePaddingMode, GeneralPurpose, GeneralPurposeConfig},
Engine as _,
};
/// Forgiving base64 engine for real-world email: standard alphabet, padding
/// accepted but not required. Many MUAs emit base64 with missing or extra
/// padding; `DecodePaddingMode::Indifferent` tolerates both.
const BASE64_EMAIL: GeneralPurpose = GeneralPurpose::new(
&alphabet::STANDARD,
GeneralPurposeConfig::new().with_decode_padding_mode(DecodePaddingMode::Indifferent),
);
use crate::{
error::ParseError,
message::DecodedBodyValue,
part::{ParsedPart, TransferEncoding},
};
/// Decode the body of a parsed part.
///
/// Performs transfer-encoding decode (Base64, Quoted-Printable, or identity),
/// optional byte-length truncation, and charset conversion to UTF-8 via
/// `encoding_rs`.
///
/// `max_bytes` limits the number of transfer-decoded bytes before charset
/// conversion.
///
/// When the charset is unknown or absent, this function defaults to UTF-8 rather
/// than the RFC 2045 §5.2 default of US-ASCII. UTF-8 is a strict superset of
/// ASCII for the 7-bit range and produces more useful output for the modern
/// email corpus, which is overwhelmingly UTF-8.
///
/// Returns `Err(ParseError::InvalidRange)` when `part.body_range` is out of
/// bounds for `raw`.
pub fn decode_body_value(
raw: &[u8],
part: &ParsedPart,
max_bytes: Option<usize>,
) -> Result<DecodedBodyValue, ParseError> {
let (offset_u32, length_u32) = part.body_range;
let offset = offset_u32 as usize;
let length = length_u32 as usize;
let end = offset.checked_add(length).ok_or(ParseError::InvalidRange {
offset: offset_u32,
length: length_u32,
available: raw.len() as u64,
})?;
if end > raw.len() {
return Err(ParseError::InvalidRange {
offset: offset_u32,
length: length_u32,
available: raw.len() as u64,
});
}
let body_bytes = &raw[offset..end];
// Step 1: transfer-decode, pre-truncating input to avoid decoding more
// than needed. Each path sets a `*_was_limited` flag when input was cut
// short, so Step 2 knows whether additional content exists beyond the limit.
let mut is_encoding_problem = false;
let mut input_was_limited = false;
let decoded: Vec<u8> = match part.transfer_encoding {
TransferEncoding::Base64 => {
// Limit base64 input to avoid allocating a full decode buffer when
// only a preview (max_bytes) is needed. 3 decoded bytes = 4 base64
// chars; round up to the next multiple of 4 so the STANDARD (padded)
// engine never receives a partial group, which would be a spurious
// decode error.
let max_b64_chars = max_bytes
.map(|n| n.saturating_mul(4).div_ceil(3).next_multiple_of(4))
.unwrap_or(usize::MAX);
// Strip CR/LF line wrapping, collect up to max_b64_chars bytes,
// and detect truncation — all in a single pass.
let mut stripped = Vec::with_capacity(max_b64_chars.min(body_bytes.len()));
for &b in body_bytes {
if b == b'\r' || b == b'\n' {
continue;
}
if stripped.len() >= max_b64_chars {
input_was_limited = true;
break;
}
stripped.push(b);
}
match BASE64_EMAIL.decode(&stripped) {
Ok(v) => v,
Err(_) => {
is_encoding_problem = true;
Vec::new()
}
}
}
TransferEncoding::QuotedPrintable => {
// Pre-truncate the QP input when only a preview is needed.
// Decoded bytes ≤ encoded bytes always (=XX is 3 encoded → 1 decoded;
// soft-line-break =\r\n is 3 encoded → 0 decoded). A 4× multiplier
// comfortably bounds the worst case of all-=XX content. Truncation
// mid-escape is handled gracefully by Robust mode.
//
// False-truncation guard: a body that consists mostly of soft
// line-breaks (=\r\n, 3 bytes → 0 decoded) can cause the 4×
// pre-truncation to cut the input short while the decoded result
// is still under max_bytes. If that happens we fall back to
// decoding the full body and let Step 2 decide is_truncated.
let qp_input = max_bytes.map_or(body_bytes, |n| {
let limit = n.saturating_mul(4).min(body_bytes.len());
input_was_limited = limit < body_bytes.len();
&body_bytes[..limit]
});
let decoded_preview =
match quoted_printable::decode(qp_input, quoted_printable::ParseMode::Robust) {
Ok(v) => v,
Err(_) => {
is_encoding_problem = true;
Vec::new()
}
};
// If the pre-truncated slice decoded to at most max_bytes bytes but
// the input was cut short, the truncation may be a false positive
// (soft line-breaks beyond the limit decode to nothing). The
// equality case (decoded == n) must also re-decode: without a
// full-body pass we cannot tell whether the true output is exactly
// n bytes (is_truncated=false) or more (is_truncated=true).
// Decode the full body and let Step 2 measure the real length.
if input_was_limited {
if let Some(n) = max_bytes {
if decoded_preview.len() <= n {
input_was_limited = false;
match quoted_printable::decode(
body_bytes,
quoted_printable::ParseMode::Robust,
) {
Ok(v) => v,
Err(_) => {
is_encoding_problem = true;
Vec::new()
}
}
} else {
decoded_preview
}
} else {
decoded_preview
}
} else {
decoded_preview
}
}
TransferEncoding::UUEncode => decode_uuencode(
body_bytes,
max_bytes,
&mut is_encoding_problem,
&mut input_was_limited,
),
TransferEncoding::Identity
| TransferEncoding::SevenBit
| TransferEncoding::EightBit
| TransferEncoding::Binary => {
// Slice to max_bytes before allocating to avoid copying the full body.
let truncated = max_bytes.map_or(body_bytes, |n| {
let limit = n.min(body_bytes.len());
input_was_limited = limit < body_bytes.len();
&body_bytes[..limit]
});
truncated.to_vec()
}
};
// Step 2: apply max_bytes truncation on the decoded bytes and determine
// is_truncated. All three encoding paths pre-truncate their input and
// record the result via `input_was_limited`, so the logic here is
// symmetric: either the decoded output itself exceeded max_bytes (possible
// for Base64 or QP, where the input limit is an approximation),
// or the input path was cut short.
let (truncated_bytes, is_truncated) = match max_bytes {
Some(n) if decoded.len() > n => (decoded[..n].to_vec(), true),
_ => (decoded, input_was_limited),
};
// Step 3: charset conversion to UTF-8 via encoding_rs.
// Practical default: UTF-8 is more permissive than RFC 2045 §5.2 (us-ascii)
// but avoids false is_encoding_problem flags on modern charsetless text.
let charset = part.charset.as_deref().unwrap_or("utf-8");
let enc = encoding_rs::Encoding::for_label(charset.as_bytes()).unwrap_or(encoding_rs::UTF_8);
let (cow, _, had_errors) = enc.decode(&truncated_bytes);
is_encoding_problem |= had_errors;
// Step 4: encoding_rs guarantees valid UTF-8 output. Any truncation that
// cut through a multi-byte source sequence causes encoding_rs to emit a
// replacement character and set had_errors, which we already capture in
// is_encoding_problem above.
let value = cow.into_owned();
Ok(DecodedBodyValue {
value,
is_truncated,
is_encoding_problem,
})
}
/// Decode a UUencoded body using the `uuencoding` crate.
///
/// Delegates to [`uuencoding::decode`], which handles `begin`/`end` framing,
/// CRLF stripping, space/backtick zero-value handling, and partial-result
/// tolerance. This replaces a duplicate in-crate implementation and ensures
/// all UU edge-case fixes in the `uuencoding` crate apply here automatically.
///
/// - Respects `max_bytes`; sets `*input_was_limited` when the decoded output
/// was truncated to the limit.
/// - Sets `*is_encoding_problem` when the block is missing a `begin` line,
/// is a `begin-base64` block, or the decoded result was truncated (i.e.
/// the `end` line was absent or a data line was malformed).
fn decode_uuencode(
body: &[u8],
max_bytes: Option<usize>,
is_encoding_problem: &mut bool,
input_was_limited: &mut bool,
) -> Vec<u8> {
// Use decode_limited so that decoding halts as soon as max_bytes decoded
// bytes have been produced. Note: input is still split into lines up-front
// (O(input)), but data decoding stops at max_bytes.
match uuencoding::decode_limited(body, max_bytes) {
Err(_) => {
*is_encoding_problem = true;
Vec::new()
}
Ok(block) => {
if block.is_truncated {
// was_limit_hit is set by decode_limited() when max_bytes
// caused the early stop. Absent that, the block was genuinely
// truncated (missing end line, bad data byte, etc.).
if block.was_limit_hit {
*input_was_limited = true;
} else {
*is_encoding_problem = true;
}
}
block.data
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::part::{ParsedPart, TransferEncoding};
/// Build a synthetic raw buffer with `body_bytes` appended after a fake
/// header block, and return a matching `ParsedPart`.
fn make_part(
body_bytes: &[u8],
transfer_encoding: TransferEncoding,
charset: Option<&str>,
) -> (Vec<u8>, ParsedPart) {
let prefix = b"fake-header: x\r\n\r\n";
let mut raw: Vec<u8> = prefix.to_vec();
let offset = raw.len();
raw.extend_from_slice(body_bytes);
let length = body_bytes.len();
let part = ParsedPart {
part_id: "1".to_owned(),
content_type: "text/plain".to_owned(),
charset: charset.map(str::to_owned),
transfer_encoding,
disposition: None,
filename: None,
cid: None,
header_range: (0u32, offset as u32),
body_range: (offset as u32, length as u32),
children: vec![],
is_encoding_problem: false,
};
(raw, part)
}
#[test]
fn test_base64_body() {
// Oracle: base64("Hello, World!") == "SGVsbG8sIFdvcmxkIQ=="
let b64 = b"SGVsbG8sIFdvcmxkIQ==";
let (raw, part) = make_part(b64, TransferEncoding::Base64, Some("utf-8"));
let result = decode_body_value(&raw, &part, None).unwrap();
assert_eq!(result.value, "Hello, World!");
assert!(!result.is_truncated);
assert!(!result.is_encoding_problem);
}
#[test]
fn test_base64_missing_padding_tolerant() {
// Oracle: base64("Hello, World!") == "SGVsbG8sIFdvcmxkIQ=="
// Many MUAs strip trailing '=' padding. The forgiving engine must
// decode this without setting is_encoding_problem.
let b64_no_pad = b"SGVsbG8sIFdvcmxkIQ";
let (raw, part) = make_part(b64_no_pad, TransferEncoding::Base64, Some("utf-8"));
let result = decode_body_value(&raw, &part, None).unwrap();
assert_eq!(result.value, "Hello, World!");
assert!(
!result.is_encoding_problem,
"missing padding must not be an error"
);
}
#[test]
fn test_base64_invalid_chars_sets_encoding_problem() {
// Truly invalid base64 (characters outside the alphabet) must set
// is_encoding_problem and return an empty value.
let b64_bad = b"!!!not-base64!!!";
let (raw, part) = make_part(b64_bad, TransferEncoding::Base64, Some("utf-8"));
let result = decode_body_value(&raw, &part, None).unwrap();
assert!(result.is_encoding_problem);
assert!(result.value.is_empty());
}
#[test]
fn test_quoted_printable_body() {
// Oracle: QP encoding of "café" in UTF-8 is "caf=C3=A9"
let qp = b"caf=C3=A9";
let (raw, part) = make_part(qp, TransferEncoding::QuotedPrintable, Some("utf-8"));
let result = decode_body_value(&raw, &part, None).unwrap();
assert_eq!(result.value, "caf\u{e9}"); // "café"
assert!(!result.is_truncated);
assert!(!result.is_encoding_problem);
}
#[test]
fn test_latin1_charset() {
// Oracle: latin-1 byte 0xE9 is 'é' (U+00E9)
let latin1 = b"\xe9";
let (raw, part) = make_part(latin1, TransferEncoding::Identity, Some("iso-8859-1"));
let result = decode_body_value(&raw, &part, None).unwrap();
assert_eq!(result.value, "\u{e9}"); // "é"
assert!(!result.is_truncated);
assert!(!result.is_encoding_problem);
}
#[test]
fn test_max_bytes_truncation() {
// Body = "Hello, World!" (13 bytes), max_bytes = 5 → "Hello"
let body = b"Hello, World!";
let (raw, part) = make_part(body, TransferEncoding::Identity, Some("utf-8"));
let result = decode_body_value(&raw, &part, Some(5)).unwrap();
assert_eq!(result.value, "Hello");
assert!(result.is_truncated);
assert!(!result.is_encoding_problem);
}
#[test]
fn test_base64_is_truncated_multiple_of_3() {
// Oracle: base64("Hello, World!") == "SGVsbG8sIFdvcmxkIQ=="
// "Hello, World!" is 13 bytes. For max_bytes that are multiples of 3
// AND less than 13, the body is truncated and is_truncated must be true.
// For max_bytes = 13 (exact body length), is_truncated must be false.
let b64 = b"SGVsbG8sIFdvcmxkIQ==";
let (raw, part) = make_part(b64, TransferEncoding::Base64, Some("utf-8"));
// max_bytes = 3: multiple of 3, body is 13 bytes, must be truncated
let result = decode_body_value(&raw, &part, Some(3)).unwrap();
assert!(
result.is_truncated,
"max_bytes=3 (multiple of 3) on 13-byte body: is_truncated must be true"
);
// max_bytes = 6: multiple of 3, body is 13 bytes, must be truncated
let result = decode_body_value(&raw, &part, Some(6)).unwrap();
assert!(
result.is_truncated,
"max_bytes=6 (multiple of 3) on 13-byte body: is_truncated must be true"
);
// max_bytes = 9: multiple of 3, body is 13 bytes, must be truncated
let result = decode_body_value(&raw, &part, Some(9)).unwrap();
assert!(
result.is_truncated,
"max_bytes=9 (multiple of 3) on 13-byte body: is_truncated must be true"
);
// max_bytes = 13: exact body length — NOT truncated
let result = decode_body_value(&raw, &part, Some(13)).unwrap();
assert!(
!result.is_truncated,
"max_bytes=13 (exact body length): is_truncated must be false"
);
}
#[test]
fn test_base64_max_bytes_non_multiple_of_4() {
// Oracle: base64("Hello, World!") == "SGVsbG8sIFdvcmxkIQ=="
// "Hello, World!" is 13 bytes. For each max_bytes from 1..=10 the
// pre-truncation of the base64 input must be a multiple of 4 so the
// STANDARD (padded) engine does not reject it with a spurious error.
let b64 = b"SGVsbG8sIFdvcmxkIQ==";
let (raw, part) = make_part(b64, TransferEncoding::Base64, Some("utf-8"));
for n in 1usize..=10 {
let result = decode_body_value(&raw, &part, Some(n)).unwrap();
assert!(
!result.is_encoding_problem,
"max_bytes={n}: unexpected encoding problem (base64 pre-truncation not a multiple of 4?)"
);
assert!(
!result.value.is_empty(),
"max_bytes={n}: expected non-empty result"
);
}
}
// -----------------------------------------------------------------------
// Quoted-Printable: soft-line-break false-truncation guard
//
// QP soft line breaks (=\r\n) decode to 0 bytes. A body consisting of
// N literal bytes followed by many soft line breaks can be pre-truncated
// by the 4× heuristic while the decoded payload still fits in max_bytes.
// The false-truncation guard detects this and re-decodes the full body
// before setting is_truncated.
// -----------------------------------------------------------------------
/// QP body: one literal ASCII byte followed by many soft line-breaks.
///
/// With max_bytes = 1 the 4× pre-truncation cuts the input short, but the
/// full body decodes to exactly 1 byte — so is_truncated must be FALSE.
///
/// Without the false-truncation guard, input_was_limited would be set to
/// true and is_truncated would be returned as true, which is incorrect.
#[test]
fn test_qp_soft_linebreak_no_false_truncation() {
// Body: "a" + 20 soft line-breaks ("=\r\n" × 20 = 60 bytes) = 61 total.
// Full decode: "a" (1 byte).
// max_bytes = 1: 4× limit = 4, input_was_limited = true for the preview.
// Decoded preview from first 4 bytes ("a=\r\n") = "a" (1 byte).
// 1 < max_bytes=1 is false (1 is not < 1), so no re-decode needed —
// but decoded_preview.len() == max_bytes, so guard does not re-decode.
// Actually let's use max_bytes = 2 so decoded (1) < max_bytes (2).
let mut body = b"a".to_vec();
for _ in 0..20 {
body.extend_from_slice(b"=\r\n");
}
let (raw, part) = make_part(&body, TransferEncoding::QuotedPrintable, Some("utf-8"));
let result = decode_body_value(&raw, &part, Some(2)).unwrap();
assert_eq!(result.value, "a", "decoded value must be 'a'");
assert!(
!result.is_truncated,
"is_truncated must be false: full body decodes to 1 byte, which fits in max_bytes=2"
);
assert!(!result.is_encoding_problem);
}
/// QP body that decodes to *exactly* max_bytes bytes with input_was_limited=true.
///
/// This is the off-by-one case: `decoded_preview.len() == max_bytes`.
/// The old `< n` guard would not fire here, so Step 2 would see
/// `decoded.len() == n`, fall into the `_` branch, and return
/// `is_truncated = input_was_limited = true` — which is wrong.
///
/// Body: "ab" (2 literal bytes) + 20 soft line-breaks (=\r\n × 20 = 60 bytes)
/// = 62 total encoded bytes. Full decode: "ab" (2 bytes).
/// max_bytes = 2: 4× limit = 8 bytes; body > 8 → input_was_limited=true.
/// Decoded preview (first 8 bytes = "ab=\r\n=\r") in Robust mode = "ab" (2 bytes).
/// decoded_preview.len() == max_bytes == 2 → guard MUST fire and re-decode.
/// is_truncated must be FALSE.
#[test]
fn test_qp_exact_max_bytes_no_false_truncation() {
// Body: "ab" + 20 soft line-breaks ("=\r\n" × 20) = 62 total encoded bytes.
// Full decode = "ab" = exactly max_bytes=2 bytes.
let mut body = b"ab".to_vec();
for _ in 0..20 {
body.extend_from_slice(b"=\r\n");
}
let (raw, part) = make_part(&body, TransferEncoding::QuotedPrintable, Some("utf-8"));
let result = decode_body_value(&raw, &part, Some(2)).unwrap();
assert_eq!(result.value, "ab", "decoded value must be 'ab'");
assert!(
!result.is_truncated,
"is_truncated must be false: full body decodes to exactly max_bytes=2 bytes"
);
assert!(!result.is_encoding_problem);
}
/// QP body where the full decode genuinely exceeds max_bytes.
///
/// Ensures the false-truncation guard does not suppress a real truncation.
/// Body = "=41=42=43" (decodes to "ABC", 3 bytes). max_bytes = 2.
/// 4× limit = 8 bytes; body is 9 bytes → input_was_limited = true.
/// Decoded preview of first 8 bytes ("=41=42=4") in Robust mode = "AB" (2 bytes).
/// decoded.len() = 2 = max_bytes = 2 → guard fires (2 <= 2), re-decodes full body ("=41=42=43" → "ABC", 3 bytes).
/// Step 2: decoded.len() (3) > max_bytes (2) → is_truncated = true.
/// Full body decodes to "ABC" (3 > 2), so truncation is correct.
#[test]
fn test_qp_real_truncation_not_suppressed() {
// "=41=42=43" = 9 bytes; decodes to "ABC" = 3 bytes.
let body = b"=41=42=43";
let (raw, part) = make_part(body, TransferEncoding::QuotedPrintable, Some("utf-8"));
let result = decode_body_value(&raw, &part, Some(2)).unwrap();
assert_eq!(result.value.as_bytes(), b"AB", "decoded value must be 'AB'");
assert!(
result.is_truncated,
"is_truncated must be true: full body decodes to 3 bytes, exceeds max_bytes=2"
);
assert!(!result.is_encoding_problem);
}
// -----------------------------------------------------------------------
// UUencode tests
//
// All UU-encoded byte strings are from the Python 3.12 `uu` / `binascii`
// stdlib modules — the independent oracle. No expected value comes from
// this crate. Python commands are cited inline.
// -----------------------------------------------------------------------
#[test]
fn test_uuencode_hello_world() {
// Oracle (Python 3.12):
// import uu, io
// buf = io.BytesIO()
// uu.encode(io.BytesIO(b"Hello, World!"), buf, "test.txt", mode=0o644)
// print(repr(buf.getvalue()))
// → b'begin 644 test.txt\n-2&5L;&\\L(%=O<FQD(0 \n \nend\n'
//
// Expected decoded bytes (hex 48 65 6c 6c 6f 2c 20 57 6f 72 6c 64 21):
// "Hello, World!" in ASCII.
let uu_body = b"begin 644 test.txt\n-2&5L;&\\L(%=O<FQD(0 \n \nend\n";
let (raw, part) = make_part(uu_body, TransferEncoding::UUEncode, Some("utf-8"));
let result = decode_body_value(&raw, &part, None).unwrap();
assert_eq!(result.value, "Hello, World!");
assert!(!result.is_truncated);
assert!(!result.is_encoding_problem);
}
#[test]
fn test_uuencode_hello() {
// Oracle (Python 3.12):
// import uu, io
// buf = io.BytesIO()
// uu.encode(io.BytesIO(b"Hello"), buf, "hello.txt", mode=0o644)
// print(repr(buf.getvalue()))
// → b'begin 644 hello.txt\n%2&5L;&\\ \n \nend\n'
//
// Expected decoded bytes (hex 48 65 6c 6c 6f): "Hello".
let uu_body = b"begin 644 hello.txt\n%2&5L;&\\ \n \nend\n";
let (raw, part) = make_part(uu_body, TransferEncoding::UUEncode, Some("utf-8"));
let result = decode_body_value(&raw, &part, None).unwrap();
assert_eq!(result.value, "Hello");
assert!(!result.is_truncated);
assert!(!result.is_encoding_problem);
}
#[test]
fn test_uuencode_empty() {
// Oracle (Python 3.12):
// import uu, io
// buf = io.BytesIO()
// uu.encode(io.BytesIO(b""), buf, "empty.txt", mode=0o644)
// print(repr(buf.getvalue()))
// → b'begin 644 empty.txt\n \nend\n'
//
// A single space line means 0 decoded bytes (end marker).
let uu_body = b"begin 644 empty.txt\n \nend\n";
let (raw, part) = make_part(uu_body, TransferEncoding::UUEncode, Some("utf-8"));
let result = decode_body_value(&raw, &part, None).unwrap();
assert_eq!(result.value, "");
assert!(!result.is_truncated);
assert!(!result.is_encoding_problem);
}
#[test]
fn test_uuencode_crlf_line_endings() {
// Same data as test_uuencode_hello_world but with CRLF line endings.
// UU is commonly CRLF-terminated in email. Only CR/LF is stripped;
// trailing spaces (= encoding padding) must be preserved.
// Oracle: same expected bytes as the LF-only version.
let uu_body = b"begin 644 test.txt\r\n-2&5L;&\\L(%=O<FQD(0 \r\n \r\nend\r\n";
let (raw, part) = make_part(uu_body, TransferEncoding::UUEncode, Some("utf-8"));
let result = decode_body_value(&raw, &part, None).unwrap();
assert_eq!(result.value, "Hello, World!");
assert!(!result.is_truncated);
assert!(!result.is_encoding_problem);
}
#[test]
fn test_uuencode_content_before_begin_skipped() {
// Content before the "begin NNN filename" line must be silently skipped.
// Oracle: same expected bytes as test_uuencode_hello_world.
let uu_body =
b"Some MIME preamble\r\nMore garbage\r\nbegin 644 test.txt\n-2&5L;&\\L(%=O<FQD(0 \n \nend\n";
let (raw, part) = make_part(uu_body, TransferEncoding::UUEncode, Some("utf-8"));
let result = decode_body_value(&raw, &part, None).unwrap();
assert_eq!(result.value, "Hello, World!");
assert!(!result.is_truncated);
assert!(!result.is_encoding_problem);
}
#[test]
fn test_uuencode_max_bytes_truncation() {
// Oracle: UU-encoded "Hello, World!" → 13 decoded bytes.
// max_bytes = 5 should yield "Hello" and set is_truncated.
let uu_body = b"begin 644 test.txt\n-2&5L;&\\L(%=O<FQD(0 \n \nend\n";
let (raw, part) = make_part(uu_body, TransferEncoding::UUEncode, Some("utf-8"));
let result = decode_body_value(&raw, &part, Some(5)).unwrap();
assert_eq!(result.value, "Hello");
assert!(result.is_truncated);
assert!(!result.is_encoding_problem);
}
#[test]
fn test_uuencode_no_begin_line_is_encoding_problem() {
// A body with no "begin" line is malformed.
// Expected: empty output with is_encoding_problem set.
let uu_body = b"this has no begin line\njust garbage\n";
let (raw, part) = make_part(uu_body, TransferEncoding::UUEncode, None);
let result = decode_body_value(&raw, &part, None).unwrap();
assert!(result.is_encoding_problem);
}
#[test]
fn test_uuencode_null_byte_in_encoded_payload_no_panic() {
// Regression test for MIME-gcz.1: a 0x00 byte in the encoded payload
// must not panic.
//
// Mechanism: in uuencoding/src/decode_line.rs, data bytes go through
// decode_byte(), which rejects anything outside 0x20..=0x5F or 0x60.
// A 0x00 byte in a data position returns Err(InvalidChar), which causes
// the block to be returned with is_truncated=true and whatever bytes
// were decoded before the error. wrapping_sub(32) applies only to the
// length byte (line[0]), not the data payload.
//
// The key invariant is no panic regardless of the error path taken.
// is_encoding_problem will be true because is_truncated is true.
let uu_body = b"begin 644 f\n#\x00\x00\x00\x00\n \nend\n";
let (raw, part) = make_part(uu_body, TransferEncoding::UUEncode, None);
let _result = decode_body_value(&raw, &part, None).unwrap();
}
#[test]
fn test_uuencode_backtick_end_marker() {
// A backtick-only line is an alternative end marker (used by some mailers).
// Oracle (Python 3.12):
// import binascii
// print(repr(binascii.b2a_uu(b"Hi")))
// → b'"2&D \n'
//
// Replace the standard space end-marker with a backtick; decoder must stop.
// Expected decoded bytes: b"Hi" (0x48 0x69).
let uu_body = b"begin 644 hi.txt\n\"2&D \n`\nend\n";
let (raw, part) = make_part(uu_body, TransferEncoding::UUEncode, None);
let result = decode_body_value(&raw, &part, None).unwrap();
assert_eq!(result.value.as_bytes(), b"Hi");
assert!(!result.is_encoding_problem);
}
#[test]
fn test_uuencode_full_45_byte_line() {
// Oracle (Python 3.12):
// import binascii
// print(repr(binascii.b2a_uu(bytes(range(45)))))
// → b'M $" P0%!@<("0H+# T.#Q 1$A,4%187&!D:&QP=\'A\\@(2(C)"4F)R@I*BLL\n'
//
// 'M' = 77, (77-32)&63 = 45 bytes per line.
// Decoded: bytes 0x00..0x2C (0 through 44).
let uu_body =
b"begin 644 test.bin\nM $\" P0%!@<(\"0H+# T.#Q 1$A,4%187&!D:&QP=\'A\\@(2(C)\"4F)R@I*BLL\n \nend\n";
let (raw, part) = make_part(uu_body, TransferEncoding::UUEncode, None);
let result = decode_body_value(&raw, &part, None).unwrap();
assert!(!result.is_encoding_problem, "unexpected encoding problem");
let decoded = result.value.as_bytes();
assert_eq!(decoded.len(), 45, "expected 45 decoded bytes");
for (i, &b) in decoded.iter().enumerate() {
assert_eq!(
b, i as u8,
"decoded[{i}] = {b:#04x}, expected {:#04x}",
i as u8
);
}
}
#[test]
fn test_uuencode_two_line_decode() {
// Oracle (Python 3.12):
// import binascii
// print(repr(binascii.b2a_uu(bytes(range(45)))))
// → b'M $" P0%!@<("0H+# T.#Q 1$A,4%187&!D:&QP=\'A\\@(2(C)"4F)R@I*BLL\n'
// print(repr(binascii.b2a_uu(bytes(range(45, 48)))))
// → b'#+2XO\n'
//
// Two-line decode: bytes 0..47.
let uu_body = b"begin 644 test48.bin\n\
M $\" P0%!@<(\"0H+# T.#Q 1$A,4%187&!D:&QP=\'A\\@(2(C)\"4F)R@I*BLL\n\
#+2XO\n \nend\n";
let (raw, part) = make_part(uu_body, TransferEncoding::UUEncode, None);
let result = decode_body_value(&raw, &part, None).unwrap();
assert!(!result.is_encoding_problem, "unexpected encoding problem");
let decoded = result.value.as_bytes();
assert_eq!(decoded.len(), 48, "expected 48 decoded bytes");
for (i, &b) in decoded.iter().enumerate() {
assert_eq!(
b, i as u8,
"decoded[{i}] = {b:#04x}, expected {:#04x}",
i as u8
);
}
}
}