mime_tree/uuencode.rs
1//! Inline UUencode scanner for MIME body parts.
2//!
3//! # What is inline UUencode?
4//!
5//! UUencode (Unix-to-Unix encoding) predates MIME by over a decade. Before
6//! MIME standardised `Content-Transfer-Encoding` in 1992, UUencode was the
7//! dominant way to send binary attachments over 7-bit text networks (Usenet,
8//! early SMTP). A UU block looks like:
9//!
10//! ```text
11//! begin 644 filename.bin
12//! M<encoded data lines>
13//! `
14//! end
15//! ```
16//!
17//! # Why this appears in practice
18//!
19//! Many mail archives and mailing-list digests from the 1990s and early 2000s
20//! contain messages where binary files were embedded as literal UU blocks
21//! inside `text/plain` bodies — no `Content-Transfer-Encoding` header, no
22//! MIME multipart wrapper. Modern mail clients also sometimes produce hybrid
23//! messages: a MIME-structured outer shell with an inner `text/plain` part
24//! that still contains legacy inline UU attachments.
25//!
26//! # This module vs. `parse()` / `decode_body_value()`
27//!
28//! [`parse()`][crate::parse] and [`decode_body_value()`][crate::decode_body_value]
29//! handle the RFC 2045 `Content-Transfer-Encoding: x-uuencode` case — a part
30//! whose *entire body* is one UU-encoded blob declared via a MIME header.
31//!
32//! [`scan_inline_uuencode()`] is completely separate and opt-in. It operates
33//! on the raw bytes of a part's body (typically a `text/plain` part) and
34//! searches for one or more `begin … end` UU blocks embedded anywhere within
35//! the body text. It does **not** call `parse()` or `decode_body_value()`
36//! internally, and it does not modify the [`ParsedPart`][crate::ParsedPart] tree.
37//!
38//! Callers decide when to invoke this scanner. A reasonable heuristic is to
39//! call it on any `text/plain` leaf part whose decoded text contains the
40//! literal string `"begin "`.
41
42use crate::part::ParsedPart;
43
44/// A single UU-encoded binary block found inside a part body.
45///
46/// All byte offsets are **absolute** — they are in the same coordinate space
47/// as `ParsedPart::body_range` and the `raw` buffer passed to
48/// [`scan_inline_uuencode()`].
49#[derive(Debug, Clone, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
50#[non_exhaustive]
51pub struct InlineUUBlock {
52 /// Byte offset of the `begin NNN filename` line within `raw`.
53 ///
54 /// Slicing `raw[begin_offset .. begin_offset + begin_length]` (when
55 /// `begin_length` is `Some`) yields the complete UU block from the
56 /// `begin` line through the `end` line (inclusive).
57 ///
58 /// When [`is_encoding_problem`] is `true`, `begin_length` is `None`.
59 /// `begin_offset` still reflects the actual position of the offending
60 /// `begin` or `begin-base64` line within `raw`, so it can be used for
61 /// diagnostic purposes.
62 pub begin_offset: u32,
63
64 /// Byte length of the entire UU block: from the start of the `begin` line
65 /// through the end of the `end` line (inclusive of its newline).
66 ///
67 /// `None` when [`is_encoding_problem`] is `true`, because the block end
68 /// boundary is not determined for error items (`begin-base64` and
69 /// malformed `begin` lines).
70 pub begin_length: Option<u32>,
71
72 /// File permission mode parsed from the `begin` line, e.g. `0o644`.
73 pub mode: u32,
74
75 /// Filename parsed verbatim from the `begin` line.
76 pub filename: String,
77
78 /// Decoded binary content. Empty if `is_encoding_problem` is true and
79 /// no bytes could be decoded, or if the encoded payload was genuinely
80 /// empty (backtick-only lines).
81 pub data: Vec<u8>,
82
83 /// True if any decoding error was encountered (unknown/malformed line
84 /// length byte, wrong number of encoded characters, missing `end` line,
85 /// or a `begin-base64` block was detected).
86 /// A partial decode may still be present in `data`.
87 pub is_encoding_problem: bool,
88}
89
90/// Scan a MIME part's body for inline UU-encoded blocks.
91///
92/// Slices `raw` using `part.body_range` to obtain the body bytes, then scans
93/// for one or more `begin NNN filename` / `end` UU blocks embedded anywhere
94/// in the body text. Returns one [`InlineUUBlock`] per block found.
95///
96/// Delegates to [`uuencoding::scan()`] for all parsing and decoding, so all
97/// real-world tolerance built into that crate (CRLF line endings, space/backtick
98/// zero-value handling, `begin-base64` detection, data-after-terminator
99/// discarding, etc.) applies automatically.
100///
101/// # Parameters
102///
103/// * `raw` — the full raw message bytes (same buffer you passed to
104/// [`parse()`][crate::parse]).
105/// * `part` — a [`ParsedPart`][crate::ParsedPart] from the parsed tree.
106/// Only `part.body_range` is used to locate the relevant slice of `raw`.
107///
108/// # Return value
109///
110/// An empty `Vec` when:
111/// - the body contains no `begin … end` blocks,
112/// - `part.body_range` is out of bounds for `raw`.
113///
114/// Otherwise, one entry per block found, in the order they appear in the body.
115///
116/// # Notes
117///
118/// * This function does **not** call `decode_body_value()` internally. It
119/// works directly on the raw bytes of the body without any
120/// transfer-encoding decode or charset conversion.
121/// * Byte offsets in the returned [`InlineUUBlock`]s are absolute — they are
122/// relative to the start of `raw`, matching the coordinate space of
123/// `part.body_range`.
124/// * For error items where [`InlineUUBlock::is_encoding_problem`] is `true`,
125/// `begin_offset` is the position of the offending `begin` or `begin-base64`
126/// line within `raw` and `begin_length` is `None`.
127/// * No panic occurs on any input (malformed, truncated, or adversarial).
128///
129/// # Example
130///
131/// ```rust
132/// use mime_tree::{parse, scan_inline_uuencode};
133///
134/// // A text/plain message with an inline UU block.
135/// // Oracle (Python 3.12 `uu` module):
136/// // uu.encode(b"Hello", ...) → b'begin 644 hello.txt\n%2&5L;&\\ \n \nend\n'
137/// let raw: &[u8] = b"Content-Type: text/plain\r\n\r\nbegin 644 hello.txt\n%2&5L;&\\ \n \nend\n";
138/// let msg = parse(raw).unwrap();
139/// let part = msg.part_index.find_by_id("1").unwrap();
140///
141/// let blocks = scan_inline_uuencode(raw, part);
142/// assert_eq!(blocks.len(), 1);
143/// assert_eq!(blocks[0].mode, 0o644);
144/// assert_eq!(blocks[0].filename, "hello.txt");
145/// assert_eq!(blocks[0].data, b"Hello");
146/// assert!(!blocks[0].is_encoding_problem);
147/// ```
148#[must_use = "the scanned UU blocks must be used"]
149pub fn scan_inline_uuencode(raw: &[u8], part: &ParsedPart) -> Vec<InlineUUBlock> {
150 let (offset_u32, length_u32) = part.body_range;
151 let offset = offset_u32 as usize;
152 let length = length_u32 as usize;
153
154 // Defensive: body_range out of bounds → empty result, no panic.
155 let end = match offset.checked_add(length) {
156 Some(e) if e <= raw.len() => e,
157 _ => return Vec::new(),
158 };
159 let body = &raw[offset..end];
160
161 uuencoding::scan(body)
162 .into_iter()
163 .map(|result| match result {
164 Ok(block) => {
165 // Convert relative-to-body usize offsets to absolute u32 offsets.
166 let abs_begin = offset_u32
167 .saturating_add(u32::try_from(block.begin_offset).unwrap_or(u32::MAX));
168 let block_len = u32::try_from(block.end_offset.saturating_sub(block.begin_offset))
169 .unwrap_or(u32::MAX);
170 InlineUUBlock {
171 begin_offset: abs_begin,
172 begin_length: Some(block_len),
173 mode: block.metadata.mode,
174 filename: block.metadata.filename,
175 data: block.data,
176 is_encoding_problem: block.is_truncated,
177 }
178 }
179 Err(e) => {
180 // UuError::BeginBase64 or UuError::InvalidBeginLine.
181 // Both variants now carry the byte offset of the offending
182 // begin line within the body slice.
183 let rel_begin = match &e {
184 uuencoding::UuError::BeginBase64 { begin_offset } => *begin_offset,
185 uuencoding::UuError::InvalidBeginLine { begin_offset, .. } => *begin_offset,
186 // InvalidChar is never emitted by scan() — it is produced
187 // only during decode_line() and is absorbed into the
188 // ScannedBlock's is_truncated field. Treat it as offset 0.
189 _ => 0,
190 };
191 let abs_begin =
192 offset_u32.saturating_add(u32::try_from(rel_begin).unwrap_or(u32::MAX));
193 InlineUUBlock {
194 begin_offset: abs_begin,
195 begin_length: None,
196 mode: 0,
197 filename: String::new(),
198 data: Vec::new(),
199 is_encoding_problem: true,
200 }
201 }
202 })
203 .collect()
204}
205
206#[cfg(test)]
207mod tests {
208 use super::*;
209 use crate::part::{ParsedPart, TransferEncoding};
210
211 /// Build a synthetic raw buffer: `prefix || body_bytes`, returning the
212 /// buffer and a `ParsedPart` whose `body_range` points at `body_bytes`.
213 fn make_part(prefix: &[u8], body_bytes: &[u8]) -> (Vec<u8>, ParsedPart) {
214 let mut raw = prefix.to_vec();
215 let body_offset = raw.len();
216 raw.extend_from_slice(body_bytes);
217
218 let part = ParsedPart {
219 part_id: "1".to_owned(),
220 content_type: "text/plain".to_owned(),
221 charset: Some("utf-8".to_owned()),
222 transfer_encoding: TransferEncoding::Identity,
223 disposition: None,
224 filename: None,
225 cid: None,
226 header_range: (0u32, body_offset as u32),
227 body_range: (body_offset as u32, body_bytes.len() as u32),
228 children: vec![],
229 is_encoding_problem: false,
230 };
231 (raw, part)
232 }
233
234 // -----------------------------------------------------------------------
235 // TV1: single block, "Hello"
236 // Oracle (Python 3.12 `uu` module):
237 // uu.encode(io.BytesIO(b'Hello'), buf, 'hello.txt', 0o644)
238 // → b'begin 644 hello.txt\n%2&5L;&\\ \n \nend\n'
239 // -----------------------------------------------------------------------
240 #[test]
241 fn test_single_block_hello() {
242 // body hex: begin 644 hello.txt\n%2&5L;&\\ \n \nend\n
243 let body =
244 hex_bytes("626567696e203634342068656c6c6f2e7478740a253226354c3b265c200a200a656e640a");
245 let (raw, part) = make_part(b"", &body);
246
247 let blocks = scan_inline_uuencode(&raw, &part);
248 assert_eq!(blocks.len(), 1, "expected 1 block");
249
250 let b = &blocks[0];
251 assert_eq!(b.mode, 0o644);
252 assert_eq!(b.filename, "hello.txt");
253 // expected decoded: 48656c6c6f = "Hello"
254 assert_eq!(b.data, hex_bytes("48656c6c6f"));
255 assert!(!b.is_encoding_problem);
256 // begin_offset = 0 (no prefix), begin_length = Some(body.len()) = Some(36)
257 assert_eq!(b.begin_offset, 0);
258 assert_eq!(b.begin_length, Some(body.len() as u32));
259 // Verify by slicing raw
260 let len = b.begin_length.unwrap();
261 let sliced = &raw[b.begin_offset as usize..(b.begin_offset + len) as usize];
262 assert_eq!(sliced, body.as_slice());
263 }
264
265 // -----------------------------------------------------------------------
266 // TV2: two blocks with interleaved text
267 // Oracle (Python 3.12 `uu` module):
268 // hello = uu.encode(b'Hello', 'hello.txt', 0o644)
269 // → b'begin 644 hello.txt\n%2&5L;&\\ \n \nend\n' (36 bytes)
270 // fox = uu.encode(b'The quick brown fox', 'fox.bin', 0o600)
271 // → b"begin 600 fox.bin\n35&AE('%U:6-K(&)R;W=N(&9O> \n \nend\n" (54 bytes)
272 // interleaved = hello + b'Some text in between\n' + fox
273 // fox offset = 36 + 21 = 57
274 // -----------------------------------------------------------------------
275 #[test]
276 fn test_two_blocks() {
277 // full_body_hex from oracle output (hello 36 bytes + "Some text in between\n" 21 bytes + fox 54 bytes = 111 bytes)
278 let body = hex_bytes(
279 "626567696e203634342068656c6c6f2e7478740a253226354c3b265c200a200a656e64\
280 0a536f6d65207465787420696e206265747765656e0a626567696e2036303020666f78\
281 2e62696e0a3335264145282725553a362d4b282629523b573d4e2826394f3e2020200a\
282 200a656e640a",
283 );
284 let (raw, part) = make_part(b"", &body);
285
286 let blocks = scan_inline_uuencode(&raw, &part);
287 assert_eq!(blocks.len(), 2, "expected 2 blocks");
288
289 let b0 = &blocks[0];
290 assert_eq!(b0.mode, 0o644);
291 assert_eq!(b0.filename, "hello.txt");
292 assert_eq!(b0.data, hex_bytes("48656c6c6f")); // "Hello"
293 assert!(!b0.is_encoding_problem);
294 assert_eq!(b0.begin_offset, 0);
295 assert_eq!(b0.begin_length, Some(36)); // 36-byte block with terminator
296
297 let b1 = &blocks[1];
298 assert_eq!(b1.mode, 0o600);
299 assert_eq!(b1.filename, "fox.bin");
300 assert_eq!(
301 b1.data,
302 hex_bytes("54686520717569636b2062726f776e20666f78") // "The quick brown fox"
303 );
304 assert!(!b1.is_encoding_problem);
305 // block2 starts at offset 57 (36 + len("Some text in between\n") = 36+21=57)
306 assert_eq!(b1.begin_offset, 57);
307 assert_eq!(b1.begin_length, Some(54)); // 54-byte fox block with terminator
308
309 // Verify slices
310 let len0 = b0.begin_length.unwrap();
311 let len1 = b1.begin_length.unwrap();
312 let s0 = &raw[b0.begin_offset as usize..(b0.begin_offset + len0) as usize];
313 let s1 = &raw[b1.begin_offset as usize..(b1.begin_offset + len1) as usize];
314 // s0 should start with "begin 644 hello.txt\n"
315 assert!(s0.starts_with(b"begin 644 hello.txt\n"));
316 assert!(s0.ends_with(b"end\n"));
317 // s1 should start with "begin 600 fox.bin\n"
318 assert!(s1.starts_with(b"begin 600 fox.bin\n"));
319 assert!(s1.ends_with(b"end\n"));
320 }
321
322 // -----------------------------------------------------------------------
323 // TV2b: two blocks, absolute offsets with non-zero body_range
324 // -----------------------------------------------------------------------
325 #[test]
326 fn test_two_blocks_with_prefix_offset() {
327 let body = hex_bytes(
328 "626567696e203634342068656c6c6f2e7478740a253226354c3b265c200a200a656e64\
329 0a536f6d65207465787420696e206265747765656e0a626567696e2036303020666f78\
330 2e62696e0a3335264145282725553a362d4b282629523b573d4e2826394f3e2020200a\
331 200a656e640a",
332 );
333 let prefix = b"Content-Type: text/plain\r\n\r\n"; // 28 bytes
334 let (raw, part) = make_part(prefix, &body);
335
336 let blocks = scan_inline_uuencode(&raw, &part);
337 assert_eq!(blocks.len(), 2);
338
339 // Absolute offsets = prefix_len + relative_offset
340 assert_eq!(blocks[0].begin_offset, 28);
341 assert_eq!(blocks[1].begin_offset, 28 + 57); // fox starts at 57 in body
342
343 // Verify by slicing raw with absolute offsets
344 for b in &blocks {
345 let len = b.begin_length.unwrap();
346 let sliced = &raw[b.begin_offset as usize..(b.begin_offset + len) as usize];
347 assert!(sliced.starts_with(b"begin "));
348 assert!(sliced.ends_with(b"end\n"));
349 }
350 }
351
352 // -----------------------------------------------------------------------
353 // TV3: missing 'end' line → is_encoding_problem = true
354 // -----------------------------------------------------------------------
355 #[test]
356 fn test_missing_end_line() {
357 // body_hex: "begin 644 test.txt\n" + UU line for Hello, no "end\n"
358 let body = hex_bytes("626567696e2036343420746573742e7478740a253226354c3b265c200a");
359 let (raw, part) = make_part(b"", &body);
360
361 let blocks = scan_inline_uuencode(&raw, &part);
362 assert_eq!(blocks.len(), 1, "block still found even without end");
363 assert!(
364 blocks[0].is_encoding_problem,
365 "missing end must set is_encoding_problem"
366 );
367 }
368
369 // -----------------------------------------------------------------------
370 // TV4: 45 bytes decoded from one full UU line (all bytes 0x00..0x2c)
371 // Oracle (Python 3.12 `uu` module):
372 // uu.encode(io.BytesIO(bytes(range(45))), buf, 'allbytes.bin', 0o644)
373 // → b'begin 644 allbytes.bin\nM $" P0%!@...Ll\n \nend\n'
374 // -----------------------------------------------------------------------
375 #[test]
376 fn test_full_line_45_bytes() {
377 // body_hex from oracle output (includes ' \n' terminator before end)
378 let body = hex_bytes(
379 "626567696e2036343420616c6c62797465732e62696e0a4d202024222050302521\
380 403c282230482b2320542e2351203124412c34253138372621443a2651503d27\
381 415c402832284329223446295240492a424c4c0a200a656e640a",
382 );
383 let (raw, part) = make_part(b"", &body);
384
385 let blocks = scan_inline_uuencode(&raw, &part);
386 assert_eq!(blocks.len(), 1);
387 assert_eq!(blocks[0].mode, 0o644);
388 assert_eq!(blocks[0].filename, "allbytes.bin");
389 assert_eq!(
390 blocks[0].data,
391 hex_bytes("000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c")
392 );
393 assert!(!blocks[0].is_encoding_problem);
394 }
395
396 // -----------------------------------------------------------------------
397 // TV5: backtick-terminated empty block (empty data)
398 // -----------------------------------------------------------------------
399 #[test]
400 fn test_backtick_empty_block() {
401 // body_hex: "begin 755 empty.bin\n`\nend\n"
402 let body = hex_bytes("626567696e2037353520656d7074792e62696e0a600a656e640a");
403 let (raw, part) = make_part(b"", &body);
404
405 let blocks = scan_inline_uuencode(&raw, &part);
406 assert_eq!(blocks.len(), 1);
407 assert_eq!(blocks[0].mode, 0o755);
408 assert_eq!(blocks[0].filename, "empty.bin");
409 assert!(blocks[0].data.is_empty(), "expected empty data");
410 assert!(!blocks[0].is_encoding_problem);
411 }
412
413 // -----------------------------------------------------------------------
414 // TV6: multi-line block
415 // Oracle (Python 3.12 `uu` module):
416 // data = b'Hello, World! This is a test of multi-line UU encoding. Adding more bytes.'
417 // uu.encode(io.BytesIO(data), buf, 'multiline.txt', 0o644)
418 // → b'begin 644 multiline.txt\nM2&5L;&\\...\n=...\n \nend\n'
419 // -----------------------------------------------------------------------
420 #[test]
421 fn test_multiline_block() {
422 // Oracle hex (Python 3.12, includes ' \n' terminator before end)
423 let body = hex_bytes(
424 "626567696e20363434206d756c74696c696e652e7478740a4d3226354c3b265c4c\
425 28253d4f3c465144283221343a26455328264553282624403d2635533d22214f39\
426 42214d3d3651543a32554c3a365945282535350a3d2826354e38565d443a365947\
427 2b422121392631493b463c403b365d52393221423e3731453c5258200a200a656e\
428 640a",
429 );
430 let (raw, part) = make_part(b"", &body);
431
432 let blocks = scan_inline_uuencode(&raw, &part);
433 assert_eq!(blocks.len(), 1);
434 assert_eq!(blocks[0].mode, 0o644);
435 assert_eq!(blocks[0].filename, "multiline.txt");
436 // Oracle decoded bytes: "Hello, World! This is a test of multi-line UU encoding. Adding more bytes."
437 assert_eq!(
438 blocks[0].data,
439 hex_bytes("48656c6c6f2c20576f726c6421205468697320697320612074657374206f66206d756c74692d6c696e6520555520656e636f64696e672e20416464696e67206d6f72652062797465732e")
440 );
441 assert!(!blocks[0].is_encoding_problem);
442 }
443
444 // -----------------------------------------------------------------------
445 // No UU blocks → empty Vec
446 // -----------------------------------------------------------------------
447 #[test]
448 fn test_no_uu_blocks() {
449 let body = b"This is just plain text.\nNo UU blocks here.\n";
450 let (raw, part) = make_part(b"", body);
451 let blocks = scan_inline_uuencode(&raw, &part);
452 assert!(blocks.is_empty());
453 }
454
455 // -----------------------------------------------------------------------
456 // Out-of-bounds body_range → empty Vec
457 // -----------------------------------------------------------------------
458 #[test]
459 fn test_out_of_bounds_body_range() {
460 let raw = b"short";
461 let part = ParsedPart {
462 part_id: "1".to_owned(),
463 content_type: "text/plain".to_owned(),
464 charset: None,
465 transfer_encoding: TransferEncoding::Identity,
466 disposition: None,
467 filename: None,
468 cid: None,
469 header_range: (0, 0),
470 body_range: (3, 100), // end = 103, beyond raw.len() = 5
471 children: vec![],
472 is_encoding_problem: false,
473 };
474 let blocks = scan_inline_uuencode(raw, &part);
475 assert!(
476 blocks.is_empty(),
477 "out-of-bounds body_range must return empty Vec"
478 );
479 }
480
481 // -----------------------------------------------------------------------
482 // Overflow-safe body_range (offset + length wraps u32)
483 // -----------------------------------------------------------------------
484 #[test]
485 fn test_overflow_safe_body_range() {
486 let raw = b"data";
487 let part = ParsedPart {
488 part_id: "1".to_owned(),
489 content_type: "text/plain".to_owned(),
490 charset: None,
491 transfer_encoding: TransferEncoding::Identity,
492 disposition: None,
493 filename: None,
494 cid: None,
495 header_range: (0, 0),
496 body_range: (u32::MAX, 1), // wraps on usize add
497 children: vec![],
498 is_encoding_problem: false,
499 };
500 let blocks = scan_inline_uuencode(raw, &part);
501 assert!(
502 blocks.is_empty(),
503 "overflowing body_range must return empty Vec"
504 );
505 }
506
507 // -----------------------------------------------------------------------
508 // begin-base64 block is reported with is_encoding_problem=true and
509 // begin_offset set to the actual position of the begin-base64 line.
510 // -----------------------------------------------------------------------
511 #[test]
512 fn test_begin_base64_is_encoding_problem() {
513 // A begin-base64 block followed by a normal UU block.
514 // The begin-base64 generates an Err item; the UU block is decoded normally.
515 // Oracle: uu.encode(b'Hello', ...) → b'begin 644 hello.txt\n%2&5L;&\\ \n \nend\n'
516 let b64_block = b"begin-base64 644 file.txt\naGVsbG8=\n====\n";
517 let uu_block = b"begin 644 hello.txt\n%2&5L;&\\ \n \nend\n";
518 let mut body = Vec::new();
519 body.extend_from_slice(b64_block);
520 body.extend_from_slice(uu_block);
521 let (raw, part) = make_part(b"", &body);
522
523 let blocks = scan_inline_uuencode(&raw, &part);
524 // Two items: one Err (begin-base64) → is_encoding_problem, one Ok (UU block).
525 assert_eq!(blocks.len(), 2, "expected 2 items");
526 assert!(
527 blocks[0].is_encoding_problem,
528 "begin-base64 block must have is_encoding_problem=true"
529 );
530 // begin_offset must be 0 (begin-base64 is at start of body, no prefix)
531 assert_eq!(
532 blocks[0].begin_offset, 0,
533 "begin-base64 at body start must have begin_offset=0"
534 );
535 assert!(
536 !blocks[1].is_encoding_problem,
537 "valid UU block must not have is_encoding_problem"
538 );
539 assert_eq!(blocks[1].data, b"Hello");
540 }
541
542 // -----------------------------------------------------------------------
543 // TV-b64-solo: body contains only a begin-base64 block, no UU block follows.
544 //
545 // scan_inline_uuencode must return exactly one item with is_encoding_problem=true.
546 // This tests the case from test_begin_base64_is_encoding_problem stripped of
547 // the trailing valid UU block, to confirm the scanner does not drop the error
548 // item or return an empty Vec when nothing follows the begin-base64 block.
549 // -----------------------------------------------------------------------
550 #[test]
551 fn test_begin_base64_only_block() {
552 let body = b"begin-base64 644 file.gif\nSGVsbG8=\n====\n";
553 let (raw, part) = make_part(b"", body);
554
555 let blocks = scan_inline_uuencode(&raw, &part);
556 assert_eq!(
557 blocks.len(),
558 1,
559 "expected exactly 1 item for a solo begin-base64 block"
560 );
561 assert!(
562 blocks[0].is_encoding_problem,
563 "begin-base64 block must have is_encoding_problem=true"
564 );
565 }
566
567 // -----------------------------------------------------------------------
568 // begin-base64 with prefix: begin_offset reflects actual position
569 // -----------------------------------------------------------------------
570 #[test]
571 fn test_begin_base64_offset_with_prefix() {
572 // Prose before the begin-base64 block: begin_offset must not be 0.
573 let prefix = b"Some prose before.\n"; // 19 bytes
574 let b64_block = b"begin-base64 644 file.txt\naGVsbG8=\n====\n";
575 let mut body = Vec::new();
576 body.extend_from_slice(prefix);
577 body.extend_from_slice(b64_block);
578 let (raw, part) = make_part(b"", &body);
579
580 let blocks = scan_inline_uuencode(&raw, &part);
581 assert_eq!(blocks.len(), 1);
582 assert!(blocks[0].is_encoding_problem);
583 assert_eq!(
584 blocks[0].begin_offset,
585 prefix.len() as u32,
586 "begin_offset must equal the prefix length"
587 );
588 }
589
590 // -----------------------------------------------------------------------
591 // Helper: decode a hex string to bytes.
592 // -----------------------------------------------------------------------
593 fn hex_bytes(s: &str) -> Vec<u8> {
594 // Strip any whitespace (allows multi-line hex literals in tests).
595 let s: String = s.chars().filter(|c| !c.is_whitespace()).collect();
596 (0..s.len())
597 .step_by(2)
598 .map(|i| u8::from_str_radix(&s[i..i + 2], 16).unwrap())
599 .collect()
600 }
601}