mime_tree/
uuencode.rs

1//! Inline UUencode scanner for MIME body parts.
2//!
3//! # What is inline UUencode?
4//!
5//! UUencode (Unix-to-Unix encoding) predates MIME by over a decade.  Before
6//! MIME standardised `Content-Transfer-Encoding` in 1992, UUencode was the
7//! dominant way to send binary attachments over 7-bit text networks (Usenet,
8//! early SMTP).  A UU block looks like:
9//!
10//! ```text
11//! begin 644 filename.bin
12//! M<encoded data lines>
13//! `
14//! end
15//! ```
16//!
17//! # Why this appears in practice
18//!
19//! Many mail archives and mailing-list digests from the 1990s and early 2000s
20//! contain messages where binary files were embedded as literal UU blocks
21//! inside `text/plain` bodies — no `Content-Transfer-Encoding` header, no
22//! MIME multipart wrapper.  Modern mail clients also sometimes produce hybrid
23//! messages: a MIME-structured outer shell with an inner `text/plain` part
24//! that still contains legacy inline UU attachments.
25//!
26//! # This module vs. `parse()` / `decode_body_value()`
27//!
28//! [`parse()`][crate::parse] and [`decode_body_value()`][crate::decode_body_value]
29//! handle the RFC 2045 `Content-Transfer-Encoding: x-uuencode` case — a part
30//! whose *entire body* is one UU-encoded blob declared via a MIME header.
31//!
32//! [`scan_inline_uuencode()`] is completely separate and opt-in.  It operates
33//! on the raw bytes of a part's body (typically a `text/plain` part) and
34//! searches for one or more `begin … end` UU blocks embedded anywhere within
35//! the body text.  It does **not** call `parse()` or `decode_body_value()`
36//! internally, and it does not modify the [`ParsedPart`][crate::ParsedPart] tree.
37//!
38//! Callers decide when to invoke this scanner.  A reasonable heuristic is to
39//! call it on any `text/plain` leaf part whose decoded text contains the
40//! literal string `"begin "`.
41
42use crate::part::ParsedPart;
43
44/// A single UU-encoded binary block found inside a part body.
45///
46/// All byte offsets are **absolute** — they are in the same coordinate space
47/// as `ParsedPart::body_range` and the `raw` buffer passed to
48/// [`scan_inline_uuencode()`].
49#[derive(Debug, Clone, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
50#[non_exhaustive]
51pub struct InlineUUBlock {
52    /// Byte offset of the `begin NNN filename` line within `raw`.
53    ///
54    /// Slicing `raw[begin_offset .. begin_offset + begin_length]` (when
55    /// `begin_length` is `Some`) yields the complete UU block from the
56    /// `begin` line through the `end` line (inclusive).
57    ///
58    /// When [`is_encoding_problem`] is `true`, `begin_length` is `None`.
59    /// `begin_offset` still reflects the actual position of the offending
60    /// `begin` or `begin-base64` line within `raw`, so it can be used for
61    /// diagnostic purposes.
62    pub begin_offset: u32,
63
64    /// Byte length of the entire UU block: from the start of the `begin` line
65    /// through the end of the `end` line (inclusive of its newline).
66    ///
67    /// `None` when [`is_encoding_problem`] is `true`, because the block end
68    /// boundary is not determined for error items (`begin-base64` and
69    /// malformed `begin` lines).
70    pub begin_length: Option<u32>,
71
72    /// File permission mode parsed from the `begin` line, e.g. `0o644`.
73    pub mode: u32,
74
75    /// Filename parsed verbatim from the `begin` line.
76    pub filename: String,
77
78    /// Decoded binary content.  Empty if `is_encoding_problem` is true and
79    /// no bytes could be decoded, or if the encoded payload was genuinely
80    /// empty (backtick-only lines).
81    pub data: Vec<u8>,
82
83    /// True if any decoding error was encountered (unknown/malformed line
84    /// length byte, wrong number of encoded characters, missing `end` line,
85    /// or a `begin-base64` block was detected).
86    /// A partial decode may still be present in `data`.
87    pub is_encoding_problem: bool,
88}
89
90/// Scan a MIME part's body for inline UU-encoded blocks.
91///
92/// Slices `raw` using `part.body_range` to obtain the body bytes, then scans
93/// for one or more `begin NNN filename` / `end` UU blocks embedded anywhere
94/// in the body text.  Returns one [`InlineUUBlock`] per block found.
95///
96/// Delegates to [`uuencoding::scan()`] for all parsing and decoding, so all
97/// real-world tolerance built into that crate (CRLF line endings, space/backtick
98/// zero-value handling, `begin-base64` detection, data-after-terminator
99/// discarding, etc.) applies automatically.
100///
101/// # Parameters
102///
103/// * `raw`  — the full raw message bytes (same buffer you passed to
104///   [`parse()`][crate::parse]).
105/// * `part` — a [`ParsedPart`][crate::ParsedPart] from the parsed tree.
106///   Only `part.body_range` is used to locate the relevant slice of `raw`.
107///
108/// # Return value
109///
110/// An empty `Vec` when:
111/// - the body contains no `begin … end` blocks,
112/// - `part.body_range` is out of bounds for `raw`.
113///
114/// Otherwise, one entry per block found, in the order they appear in the body.
115///
116/// # Notes
117///
118/// * This function does **not** call `decode_body_value()` internally.  It
119///   works directly on the raw bytes of the body without any
120///   transfer-encoding decode or charset conversion.
121/// * Byte offsets in the returned [`InlineUUBlock`]s are absolute — they are
122///   relative to the start of `raw`, matching the coordinate space of
123///   `part.body_range`.
124/// * For error items where [`InlineUUBlock::is_encoding_problem`] is `true`,
125///   `begin_offset` is the position of the offending `begin` or `begin-base64`
126///   line within `raw` and `begin_length` is `None`.
127/// * No panic occurs on any input (malformed, truncated, or adversarial).
128///
129/// # Example
130///
131/// ```rust
132/// use mime_tree::{parse, scan_inline_uuencode};
133///
134/// // A text/plain message with an inline UU block.
135/// // Oracle (Python 3.12 `uu` module):
136/// //   uu.encode(b"Hello", ...) → b'begin 644 hello.txt\n%2&5L;&\\ \n \nend\n'
137/// let raw: &[u8] = b"Content-Type: text/plain\r\n\r\nbegin 644 hello.txt\n%2&5L;&\\ \n \nend\n";
138/// let msg = parse(raw).unwrap();
139/// let part = msg.part_index.find_by_id("1").unwrap();
140///
141/// let blocks = scan_inline_uuencode(raw, part);
142/// assert_eq!(blocks.len(), 1);
143/// assert_eq!(blocks[0].mode, 0o644);
144/// assert_eq!(blocks[0].filename, "hello.txt");
145/// assert_eq!(blocks[0].data, b"Hello");
146/// assert!(!blocks[0].is_encoding_problem);
147/// ```
148#[must_use = "the scanned UU blocks must be used"]
149pub fn scan_inline_uuencode(raw: &[u8], part: &ParsedPart) -> Vec<InlineUUBlock> {
150    let (offset_u32, length_u32) = part.body_range;
151    let offset = offset_u32 as usize;
152    let length = length_u32 as usize;
153
154    // Defensive: body_range out of bounds → empty result, no panic.
155    let end = match offset.checked_add(length) {
156        Some(e) if e <= raw.len() => e,
157        _ => return Vec::new(),
158    };
159    let body = &raw[offset..end];
160
161    uuencoding::scan(body)
162        .into_iter()
163        .map(|result| match result {
164            Ok(block) => {
165                // Convert relative-to-body usize offsets to absolute u32 offsets.
166                let abs_begin = offset_u32
167                    .saturating_add(u32::try_from(block.begin_offset).unwrap_or(u32::MAX));
168                let block_len = u32::try_from(block.end_offset.saturating_sub(block.begin_offset))
169                    .unwrap_or(u32::MAX);
170                InlineUUBlock {
171                    begin_offset: abs_begin,
172                    begin_length: Some(block_len),
173                    mode: block.metadata.mode,
174                    filename: block.metadata.filename,
175                    data: block.data,
176                    is_encoding_problem: block.is_truncated,
177                }
178            }
179            Err(e) => {
180                // UuError::BeginBase64 or UuError::InvalidBeginLine.
181                // Both variants now carry the byte offset of the offending
182                // begin line within the body slice.
183                let rel_begin = match &e {
184                    uuencoding::UuError::BeginBase64 { begin_offset } => *begin_offset,
185                    uuencoding::UuError::InvalidBeginLine { begin_offset, .. } => *begin_offset,
186                    // InvalidChar is never emitted by scan() — it is produced
187                    // only during decode_line() and is absorbed into the
188                    // ScannedBlock's is_truncated field.  Treat it as offset 0.
189                    _ => 0,
190                };
191                let abs_begin =
192                    offset_u32.saturating_add(u32::try_from(rel_begin).unwrap_or(u32::MAX));
193                InlineUUBlock {
194                    begin_offset: abs_begin,
195                    begin_length: None,
196                    mode: 0,
197                    filename: String::new(),
198                    data: Vec::new(),
199                    is_encoding_problem: true,
200                }
201            }
202        })
203        .collect()
204}
205
206#[cfg(test)]
207mod tests {
208    use super::*;
209    use crate::part::{ParsedPart, TransferEncoding};
210
211    /// Build a synthetic raw buffer: `prefix || body_bytes`, returning the
212    /// buffer and a `ParsedPart` whose `body_range` points at `body_bytes`.
213    fn make_part(prefix: &[u8], body_bytes: &[u8]) -> (Vec<u8>, ParsedPart) {
214        let mut raw = prefix.to_vec();
215        let body_offset = raw.len();
216        raw.extend_from_slice(body_bytes);
217
218        let part = ParsedPart {
219            part_id: "1".to_owned(),
220            content_type: "text/plain".to_owned(),
221            charset: Some("utf-8".to_owned()),
222            transfer_encoding: TransferEncoding::Identity,
223            disposition: None,
224            filename: None,
225            cid: None,
226            header_range: (0u32, body_offset as u32),
227            body_range: (body_offset as u32, body_bytes.len() as u32),
228            children: vec![],
229            is_encoding_problem: false,
230        };
231        (raw, part)
232    }
233
234    // -----------------------------------------------------------------------
235    // TV1: single block, "Hello"
236    // Oracle (Python 3.12 `uu` module):
237    //   uu.encode(io.BytesIO(b'Hello'), buf, 'hello.txt', 0o644)
238    //   → b'begin 644 hello.txt\n%2&5L;&\\ \n \nend\n'
239    // -----------------------------------------------------------------------
240    #[test]
241    fn test_single_block_hello() {
242        // body hex: begin 644 hello.txt\n%2&5L;&\\ \n \nend\n
243        let body =
244            hex_bytes("626567696e203634342068656c6c6f2e7478740a253226354c3b265c200a200a656e640a");
245        let (raw, part) = make_part(b"", &body);
246
247        let blocks = scan_inline_uuencode(&raw, &part);
248        assert_eq!(blocks.len(), 1, "expected 1 block");
249
250        let b = &blocks[0];
251        assert_eq!(b.mode, 0o644);
252        assert_eq!(b.filename, "hello.txt");
253        // expected decoded: 48656c6c6f = "Hello"
254        assert_eq!(b.data, hex_bytes("48656c6c6f"));
255        assert!(!b.is_encoding_problem);
256        // begin_offset = 0 (no prefix), begin_length = Some(body.len()) = Some(36)
257        assert_eq!(b.begin_offset, 0);
258        assert_eq!(b.begin_length, Some(body.len() as u32));
259        // Verify by slicing raw
260        let len = b.begin_length.unwrap();
261        let sliced = &raw[b.begin_offset as usize..(b.begin_offset + len) as usize];
262        assert_eq!(sliced, body.as_slice());
263    }
264
265    // -----------------------------------------------------------------------
266    // TV2: two blocks with interleaved text
267    // Oracle (Python 3.12 `uu` module):
268    //   hello = uu.encode(b'Hello', 'hello.txt', 0o644)
269    //           → b'begin 644 hello.txt\n%2&5L;&\\ \n \nend\n'  (36 bytes)
270    //   fox   = uu.encode(b'The quick brown fox', 'fox.bin', 0o600)
271    //           → b"begin 600 fox.bin\n35&AE('%U:6-K(&)R;W=N(&9O>   \n \nend\n"  (54 bytes)
272    //   interleaved = hello + b'Some text in between\n' + fox
273    //   fox offset = 36 + 21 = 57
274    // -----------------------------------------------------------------------
275    #[test]
276    fn test_two_blocks() {
277        // full_body_hex from oracle output (hello 36 bytes + "Some text in between\n" 21 bytes + fox 54 bytes = 111 bytes)
278        let body = hex_bytes(
279            "626567696e203634342068656c6c6f2e7478740a253226354c3b265c200a200a656e64\
280             0a536f6d65207465787420696e206265747765656e0a626567696e2036303020666f78\
281             2e62696e0a3335264145282725553a362d4b282629523b573d4e2826394f3e2020200a\
282             200a656e640a",
283        );
284        let (raw, part) = make_part(b"", &body);
285
286        let blocks = scan_inline_uuencode(&raw, &part);
287        assert_eq!(blocks.len(), 2, "expected 2 blocks");
288
289        let b0 = &blocks[0];
290        assert_eq!(b0.mode, 0o644);
291        assert_eq!(b0.filename, "hello.txt");
292        assert_eq!(b0.data, hex_bytes("48656c6c6f")); // "Hello"
293        assert!(!b0.is_encoding_problem);
294        assert_eq!(b0.begin_offset, 0);
295        assert_eq!(b0.begin_length, Some(36)); // 36-byte block with terminator
296
297        let b1 = &blocks[1];
298        assert_eq!(b1.mode, 0o600);
299        assert_eq!(b1.filename, "fox.bin");
300        assert_eq!(
301            b1.data,
302            hex_bytes("54686520717569636b2062726f776e20666f78") // "The quick brown fox"
303        );
304        assert!(!b1.is_encoding_problem);
305        // block2 starts at offset 57 (36 + len("Some text in between\n") = 36+21=57)
306        assert_eq!(b1.begin_offset, 57);
307        assert_eq!(b1.begin_length, Some(54)); // 54-byte fox block with terminator
308
309        // Verify slices
310        let len0 = b0.begin_length.unwrap();
311        let len1 = b1.begin_length.unwrap();
312        let s0 = &raw[b0.begin_offset as usize..(b0.begin_offset + len0) as usize];
313        let s1 = &raw[b1.begin_offset as usize..(b1.begin_offset + len1) as usize];
314        // s0 should start with "begin 644 hello.txt\n"
315        assert!(s0.starts_with(b"begin 644 hello.txt\n"));
316        assert!(s0.ends_with(b"end\n"));
317        // s1 should start with "begin 600 fox.bin\n"
318        assert!(s1.starts_with(b"begin 600 fox.bin\n"));
319        assert!(s1.ends_with(b"end\n"));
320    }
321
322    // -----------------------------------------------------------------------
323    // TV2b: two blocks, absolute offsets with non-zero body_range
324    // -----------------------------------------------------------------------
325    #[test]
326    fn test_two_blocks_with_prefix_offset() {
327        let body = hex_bytes(
328            "626567696e203634342068656c6c6f2e7478740a253226354c3b265c200a200a656e64\
329             0a536f6d65207465787420696e206265747765656e0a626567696e2036303020666f78\
330             2e62696e0a3335264145282725553a362d4b282629523b573d4e2826394f3e2020200a\
331             200a656e640a",
332        );
333        let prefix = b"Content-Type: text/plain\r\n\r\n"; // 28 bytes
334        let (raw, part) = make_part(prefix, &body);
335
336        let blocks = scan_inline_uuencode(&raw, &part);
337        assert_eq!(blocks.len(), 2);
338
339        // Absolute offsets = prefix_len + relative_offset
340        assert_eq!(blocks[0].begin_offset, 28);
341        assert_eq!(blocks[1].begin_offset, 28 + 57); // fox starts at 57 in body
342
343        // Verify by slicing raw with absolute offsets
344        for b in &blocks {
345            let len = b.begin_length.unwrap();
346            let sliced = &raw[b.begin_offset as usize..(b.begin_offset + len) as usize];
347            assert!(sliced.starts_with(b"begin "));
348            assert!(sliced.ends_with(b"end\n"));
349        }
350    }
351
352    // -----------------------------------------------------------------------
353    // TV3: missing 'end' line → is_encoding_problem = true
354    // -----------------------------------------------------------------------
355    #[test]
356    fn test_missing_end_line() {
357        // body_hex: "begin 644 test.txt\n" + UU line for Hello, no "end\n"
358        let body = hex_bytes("626567696e2036343420746573742e7478740a253226354c3b265c200a");
359        let (raw, part) = make_part(b"", &body);
360
361        let blocks = scan_inline_uuencode(&raw, &part);
362        assert_eq!(blocks.len(), 1, "block still found even without end");
363        assert!(
364            blocks[0].is_encoding_problem,
365            "missing end must set is_encoding_problem"
366        );
367    }
368
369    // -----------------------------------------------------------------------
370    // TV4: 45 bytes decoded from one full UU line (all bytes 0x00..0x2c)
371    // Oracle (Python 3.12 `uu` module):
372    //   uu.encode(io.BytesIO(bytes(range(45))), buf, 'allbytes.bin', 0o644)
373    //   → b'begin 644 allbytes.bin\nM  $" P0%!@...Ll\n \nend\n'
374    // -----------------------------------------------------------------------
375    #[test]
376    fn test_full_line_45_bytes() {
377        // body_hex from oracle output (includes ' \n' terminator before end)
378        let body = hex_bytes(
379            "626567696e2036343420616c6c62797465732e62696e0a4d202024222050302521\
380             403c282230482b2320542e2351203124412c34253138372621443a2651503d27\
381             415c402832284329223446295240492a424c4c0a200a656e640a",
382        );
383        let (raw, part) = make_part(b"", &body);
384
385        let blocks = scan_inline_uuencode(&raw, &part);
386        assert_eq!(blocks.len(), 1);
387        assert_eq!(blocks[0].mode, 0o644);
388        assert_eq!(blocks[0].filename, "allbytes.bin");
389        assert_eq!(
390            blocks[0].data,
391            hex_bytes("000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c")
392        );
393        assert!(!blocks[0].is_encoding_problem);
394    }
395
396    // -----------------------------------------------------------------------
397    // TV5: backtick-terminated empty block (empty data)
398    // -----------------------------------------------------------------------
399    #[test]
400    fn test_backtick_empty_block() {
401        // body_hex: "begin 755 empty.bin\n`\nend\n"
402        let body = hex_bytes("626567696e2037353520656d7074792e62696e0a600a656e640a");
403        let (raw, part) = make_part(b"", &body);
404
405        let blocks = scan_inline_uuencode(&raw, &part);
406        assert_eq!(blocks.len(), 1);
407        assert_eq!(blocks[0].mode, 0o755);
408        assert_eq!(blocks[0].filename, "empty.bin");
409        assert!(blocks[0].data.is_empty(), "expected empty data");
410        assert!(!blocks[0].is_encoding_problem);
411    }
412
413    // -----------------------------------------------------------------------
414    // TV6: multi-line block
415    // Oracle (Python 3.12 `uu` module):
416    //   data = b'Hello, World! This is a test of multi-line UU encoding. Adding more bytes.'
417    //   uu.encode(io.BytesIO(data), buf, 'multiline.txt', 0o644)
418    //   → b'begin 644 multiline.txt\nM2&5L;&\\...\n=...\n \nend\n'
419    // -----------------------------------------------------------------------
420    #[test]
421    fn test_multiline_block() {
422        // Oracle hex (Python 3.12, includes ' \n' terminator before end)
423        let body = hex_bytes(
424            "626567696e20363434206d756c74696c696e652e7478740a4d3226354c3b265c4c\
425             28253d4f3c465144283221343a26455328264553282624403d2635533d22214f39\
426             42214d3d3651543a32554c3a365945282535350a3d2826354e38565d443a365947\
427             2b422121392631493b463c403b365d52393221423e3731453c5258200a200a656e\
428             640a",
429        );
430        let (raw, part) = make_part(b"", &body);
431
432        let blocks = scan_inline_uuencode(&raw, &part);
433        assert_eq!(blocks.len(), 1);
434        assert_eq!(blocks[0].mode, 0o644);
435        assert_eq!(blocks[0].filename, "multiline.txt");
436        // Oracle decoded bytes: "Hello, World! This is a test of multi-line UU encoding. Adding more bytes."
437        assert_eq!(
438            blocks[0].data,
439            hex_bytes("48656c6c6f2c20576f726c6421205468697320697320612074657374206f66206d756c74692d6c696e6520555520656e636f64696e672e20416464696e67206d6f72652062797465732e")
440        );
441        assert!(!blocks[0].is_encoding_problem);
442    }
443
444    // -----------------------------------------------------------------------
445    // No UU blocks → empty Vec
446    // -----------------------------------------------------------------------
447    #[test]
448    fn test_no_uu_blocks() {
449        let body = b"This is just plain text.\nNo UU blocks here.\n";
450        let (raw, part) = make_part(b"", body);
451        let blocks = scan_inline_uuencode(&raw, &part);
452        assert!(blocks.is_empty());
453    }
454
455    // -----------------------------------------------------------------------
456    // Out-of-bounds body_range → empty Vec
457    // -----------------------------------------------------------------------
458    #[test]
459    fn test_out_of_bounds_body_range() {
460        let raw = b"short";
461        let part = ParsedPart {
462            part_id: "1".to_owned(),
463            content_type: "text/plain".to_owned(),
464            charset: None,
465            transfer_encoding: TransferEncoding::Identity,
466            disposition: None,
467            filename: None,
468            cid: None,
469            header_range: (0, 0),
470            body_range: (3, 100), // end = 103, beyond raw.len() = 5
471            children: vec![],
472            is_encoding_problem: false,
473        };
474        let blocks = scan_inline_uuencode(raw, &part);
475        assert!(
476            blocks.is_empty(),
477            "out-of-bounds body_range must return empty Vec"
478        );
479    }
480
481    // -----------------------------------------------------------------------
482    // Overflow-safe body_range (offset + length wraps u32)
483    // -----------------------------------------------------------------------
484    #[test]
485    fn test_overflow_safe_body_range() {
486        let raw = b"data";
487        let part = ParsedPart {
488            part_id: "1".to_owned(),
489            content_type: "text/plain".to_owned(),
490            charset: None,
491            transfer_encoding: TransferEncoding::Identity,
492            disposition: None,
493            filename: None,
494            cid: None,
495            header_range: (0, 0),
496            body_range: (u32::MAX, 1), // wraps on usize add
497            children: vec![],
498            is_encoding_problem: false,
499        };
500        let blocks = scan_inline_uuencode(raw, &part);
501        assert!(
502            blocks.is_empty(),
503            "overflowing body_range must return empty Vec"
504        );
505    }
506
507    // -----------------------------------------------------------------------
508    // begin-base64 block is reported with is_encoding_problem=true and
509    // begin_offset set to the actual position of the begin-base64 line.
510    // -----------------------------------------------------------------------
511    #[test]
512    fn test_begin_base64_is_encoding_problem() {
513        // A begin-base64 block followed by a normal UU block.
514        // The begin-base64 generates an Err item; the UU block is decoded normally.
515        // Oracle: uu.encode(b'Hello', ...) → b'begin 644 hello.txt\n%2&5L;&\\ \n \nend\n'
516        let b64_block = b"begin-base64 644 file.txt\naGVsbG8=\n====\n";
517        let uu_block = b"begin 644 hello.txt\n%2&5L;&\\ \n \nend\n";
518        let mut body = Vec::new();
519        body.extend_from_slice(b64_block);
520        body.extend_from_slice(uu_block);
521        let (raw, part) = make_part(b"", &body);
522
523        let blocks = scan_inline_uuencode(&raw, &part);
524        // Two items: one Err (begin-base64) → is_encoding_problem, one Ok (UU block).
525        assert_eq!(blocks.len(), 2, "expected 2 items");
526        assert!(
527            blocks[0].is_encoding_problem,
528            "begin-base64 block must have is_encoding_problem=true"
529        );
530        // begin_offset must be 0 (begin-base64 is at start of body, no prefix)
531        assert_eq!(
532            blocks[0].begin_offset, 0,
533            "begin-base64 at body start must have begin_offset=0"
534        );
535        assert!(
536            !blocks[1].is_encoding_problem,
537            "valid UU block must not have is_encoding_problem"
538        );
539        assert_eq!(blocks[1].data, b"Hello");
540    }
541
542    // -----------------------------------------------------------------------
543    // TV-b64-solo: body contains only a begin-base64 block, no UU block follows.
544    //
545    // scan_inline_uuencode must return exactly one item with is_encoding_problem=true.
546    // This tests the case from test_begin_base64_is_encoding_problem stripped of
547    // the trailing valid UU block, to confirm the scanner does not drop the error
548    // item or return an empty Vec when nothing follows the begin-base64 block.
549    // -----------------------------------------------------------------------
550    #[test]
551    fn test_begin_base64_only_block() {
552        let body = b"begin-base64 644 file.gif\nSGVsbG8=\n====\n";
553        let (raw, part) = make_part(b"", body);
554
555        let blocks = scan_inline_uuencode(&raw, &part);
556        assert_eq!(
557            blocks.len(),
558            1,
559            "expected exactly 1 item for a solo begin-base64 block"
560        );
561        assert!(
562            blocks[0].is_encoding_problem,
563            "begin-base64 block must have is_encoding_problem=true"
564        );
565    }
566
567    // -----------------------------------------------------------------------
568    // begin-base64 with prefix: begin_offset reflects actual position
569    // -----------------------------------------------------------------------
570    #[test]
571    fn test_begin_base64_offset_with_prefix() {
572        // Prose before the begin-base64 block: begin_offset must not be 0.
573        let prefix = b"Some prose before.\n"; // 19 bytes
574        let b64_block = b"begin-base64 644 file.txt\naGVsbG8=\n====\n";
575        let mut body = Vec::new();
576        body.extend_from_slice(prefix);
577        body.extend_from_slice(b64_block);
578        let (raw, part) = make_part(b"", &body);
579
580        let blocks = scan_inline_uuencode(&raw, &part);
581        assert_eq!(blocks.len(), 1);
582        assert!(blocks[0].is_encoding_problem);
583        assert_eq!(
584            blocks[0].begin_offset,
585            prefix.len() as u32,
586            "begin_offset must equal the prefix length"
587        );
588    }
589
590    // -----------------------------------------------------------------------
591    // Helper: decode a hex string to bytes.
592    // -----------------------------------------------------------------------
593    fn hex_bytes(s: &str) -> Vec<u8> {
594        // Strip any whitespace (allows multi-line hex literals in tests).
595        let s: String = s.chars().filter(|c| !c.is_whitespace()).collect();
596        (0..s.len())
597            .step_by(2)
598            .map(|i| u8::from_str_radix(&s[i..i + 2], 16).unwrap())
599            .collect()
600    }
601}
mime_tree/uuencode.rs

mime_tree/
uuencode.rs