mime-tree 0.4.0

RFC 5322/MIME parser producing a byte-range-indexed part tree
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
//! Inline yEnc scanner for MIME body parts.
//!
//! # What is inline yEnc?
//!
//! yEnc binary posts on Usenet rarely carry a `Content-Transfer-Encoding`
//! header. Instead, the article body simply contains `=ybegin`/`=yend` framing
//! directly in the message body — often with no MIME structure at all. The
//! outer message is treated as `text/plain` (by default when no `Content-Type`
//! is present), and the encoded binary is embedded in it.
//!
//! # This module vs. `parse()` / `decode_body_value()`
//!
//! [`parse()`][crate::parse] and [`decode_body_value()`][crate::decode_body_value]
//! do not decode yEnc content — there is no standard `Content-Transfer-Encoding`
//! value for yEnc. Those functions will return the raw body text including the
//! `=ybegin` lines verbatim.
//!
//! [`scan_inline_yencode()`] is the opt-in scanner for this case. It operates
//! on the raw bytes of a part's body and locates every `=ybegin`…`=yend` block,
//! decoding each via the [`yencoding`] crate.
//!
//! # When to call this
//!
//! A reasonable heuristic: call `scan_inline_yencode()` on any `text/plain`
//! leaf part whose body bytes contain the ASCII sequence `b"=ybegin "`. This
//! avoids scanning every part while still catching all practical cases.
//!
//! ```rust
//! use mime_tree::{parse, scan_inline_yencode};
//!
//! // A message with no MIME structure — just a yEnc block in the body.
//! // Oracle: bytes [0,1,2] encode as ['*','+',',']; CRC32 = 0x0854897f
//! let raw: &[u8] = b"From: poster@example.com\r\n\
//!                    Subject: [1/1] hi.bin\r\n\
//!                    \r\n\
//!                    Some prose before the attachment.\r\n\
//!                    =ybegin line=128 size=3 name=hi.bin\r\n\
//!                    *+,\r\n\
//!                    =yend size=3 crc32=0854897f\r\n\
//!                    Some prose after.\r\n";
//!
//! let msg = parse(raw).unwrap();
//! let part = msg.part_index.find_by_id("1").unwrap();
//!
//! let blocks = scan_inline_yencode(raw, part);
//! assert_eq!(blocks.len(), 1);
//! assert_eq!(blocks[0].filename, "hi.bin");
//! assert_eq!(blocks[0].data, &[0u8, 1, 2]);
//! assert!(blocks[0].crc32_verified);
//! assert!(!blocks[0].is_encoding_problem);
//! ```

use crate::part::ParsedPart;

/// A single yEnc-encoded block found inside a part body.
///
/// All byte offsets are **absolute** — they are in the same coordinate space
/// as `ParsedPart::body_range` and the `raw` buffer passed to
/// [`scan_inline_yencode()`].
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
pub struct InlineYEncBlock {
    /// Byte offset of the `=ybegin` line within `raw`.
    ///
    /// Slicing `raw[begin_offset .. begin_offset + begin_length]` yields the
    /// complete yEnc article from the `=ybegin` line through the `=yend` line
    /// (inclusive of its line ending).
    pub begin_offset: u32,

    /// Byte length of the entire block: from the start of `=ybegin` through
    /// the end of `=yend` (inclusive of its newline).
    ///
    /// **When [`is_encoding_problem`] is `true`**, this field holds the length
    /// of the `=ybegin` line only (up to and including its newline), not the
    /// full block through `=yend`.  The `=yend` line could not be located
    /// because decoding failed before it was reached.  Do not rely on
    /// `begin_offset + begin_length` spanning a complete block when
    /// `is_encoding_problem` is set.
    pub begin_length: u32,

    /// Filename from the `name=` field of `=ybegin`.
    ///
    /// Not sanitised against path traversal. Callers writing this to disk must
    /// validate against `..` and absolute paths.
    ///
    /// Empty when [`is_encoding_problem`] is `true` and the block header
    /// could not be parsed.
    pub filename: String,

    /// Total declared file size in bytes, from `=ybegin size=`. For multi-part
    /// articles this is the size of the complete file, not just this part.
    ///
    /// When [`is_encoding_problem`] is `true`, this field is `0` and does not
    /// reflect a declared size (the header could not be parsed).
    pub file_size: u64,

    /// 1-based part number from `=ybegin part=`. `None` for single-part articles.
    pub part: Option<u32>,

    /// Total number of parts in the series from `=ybegin total=`.
    /// `None` for single-part articles.
    pub total_parts: Option<u32>,

    /// 1-based byte offset of the first byte of this part within the full file,
    /// from `=ypart begin=`. `None` for single-part articles.
    pub part_begin: Option<u64>,

    /// 1-based byte offset of the last byte of this part within the full file,
    /// from `=ypart end=`. `None` for single-part articles.
    pub part_end: Option<u64>,

    /// Decoded binary payload.
    pub data: Vec<u8>,

    /// `true` if the CRC32 in `=yend` was present and matched the decoded
    /// bytes. `false` if no CRC field was present in the article (some older
    /// encoders omit it).
    pub crc32_verified: bool,

    /// `true` if any decoding error was encountered (missing `=ybegin`,
    /// invalid header field, missing `=yend`, CRC mismatch, or any other
    /// error returned by [`yencoding::decode`]).
    ///
    /// When this is `true`, `data` may be empty or partial.  The specific
    /// yEnc error variant is not exposed — callers only see this boolean
    /// flag.  The underlying [`yencoding::YencError`] is consumed internally
    /// to populate the sentinel fields; inspect `data.is_empty()`,
    /// `crc32_verified`, and `begin_length` to distinguish failure modes.
    pub is_encoding_problem: bool,
}

/// Scan a MIME part's body for inline yEnc-encoded blocks.
///
/// Slices `raw` using `part.body_range` to obtain the body bytes, then finds
/// every `=ybegin`…`=yend` block within the body, decoding each one via
/// [`yencoding::decode`]. Returns one [`InlineYEncBlock`] per block found.
///
/// # Parameters
///
/// * `raw`  — the full raw message bytes (same buffer passed to [`parse()`][crate::parse]).
/// * `part` — a [`ParsedPart`][crate::ParsedPart] from the parsed tree.
///   Only `part.body_range` is used.
///
/// # Return value
///
/// An empty `Vec` when:
/// - the body contains no `=ybegin` blocks, or
/// - `part.body_range` is out of bounds for `raw`.
///
/// Otherwise one entry per block, in order of appearance.
///
/// # Multiple blocks
///
/// A single body part may contain more than one yEnc article (though this is
/// unusual in practice). All blocks are decoded and returned.
///
/// # Notes
///
/// * Byte offsets in the returned blocks are absolute — relative to the start
///   of `raw`, matching the coordinate space of `part.body_range`.
/// * No panic on any input.
#[must_use = "the scanned yEnc blocks must be used"]
pub fn scan_inline_yencode(raw: &[u8], part: &ParsedPart) -> Vec<InlineYEncBlock> {
    let (offset_u32, length_u32) = part.body_range;
    let offset = offset_u32 as usize;
    let length = length_u32 as usize;

    // Defensive: body_range out of bounds → empty result, no panic.
    let end = match offset.checked_add(length) {
        Some(e) if e <= raw.len() => e,
        _ => return Vec::new(),
    };
    let body = &raw[offset..end];

    let mut results = Vec::new();
    let mut pos = 0usize;

    while pos < body.len() {
        // Find the next =ybegin line starting at or after pos.
        let ybegin_rel = match find_ybegin(body, pos) {
            Some(r) => r,
            None => break, // no more blocks
        };

        // Attempt to decode from the =ybegin line onward. yencoding::decode()
        // scans forward for =ybegin itself, so passing the slice starting at
        // ybegin_rel is correct (it will find it immediately).
        let slice = &body[ybegin_rel..];
        let (block, yend_rel_in_slice, is_error) = decode_one_block(slice);

        // Absolute offset in `raw` of this block's =ybegin line.
        let abs_begin = offset_u32.saturating_add(u32::try_from(ybegin_rel).unwrap_or(u32::MAX));

        // Byte length of the block: from =ybegin to end of =yend line.
        let block_len = u32::try_from(yend_rel_in_slice).unwrap_or(u32::MAX);

        results.push(InlineYEncBlock {
            begin_offset: abs_begin,
            begin_length: block_len,
            filename: block.metadata.filename,
            file_size: block.metadata.size,
            part: block.part,
            total_parts: block.metadata.total_parts,
            part_begin: block.part_begin,
            part_end: block.part_end,
            data: block.data,
            crc32_verified: block.crc32_verified,
            is_encoding_problem: is_error,
        });

        // Advance past the consumed block. If we couldn't find =yend, advance
        // past the =ybegin line only so we don't re-process it.
        // .max(1) guarantees forward progress even when yend_rel_in_slice is 0
        // (e.g. a zero-length =ybegin line at end of body), preventing an
        // infinite loop.
        pos = ybegin_rel + yend_rel_in_slice.max(1);
    }

    results
}

// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------

/// Find the relative offset of the next `=ybegin ` line at or after `start`
/// within `body`. Returns `None` if no such line exists.
///
/// Matches only at true line boundaries (offset 0 or immediately after `\n`)
/// to avoid false positives from encoded data that happens to contain
/// the ASCII bytes `=ybegin`.
///
/// # Precondition
///
/// `start` must be `0` or immediately following a `\n` byte in `body`
/// (i.e. a line-boundary offset). Passing a mid-line offset will not
/// produce a panic, but the search will begin at a non-line-boundary
/// position and may miss a `=ybegin` line that starts before the next
/// `\n`, or — in pathological encoded data — match `=ybegin` bytes that
/// do not appear at a true line start.
fn find_ybegin(body: &[u8], start: usize) -> Option<usize> {
    debug_assert!(
        start == 0 || body.get(start - 1) == Some(&b'\n'),
        "find_ybegin: start must be a line-boundary offset"
    );
    let needle = b"=ybegin ";
    let mut pos = start;

    while pos < body.len() {
        // Check at a line boundary.
        if body[pos..].starts_with(needle) {
            return Some(pos);
        }
        // Advance to the next line.
        match body[pos..].iter().position(|&b| b == b'\n') {
            Some(rel) => pos += rel + 1,
            None => break,
        }
    }
    None
}

/// Decode one yEnc block starting at the beginning of `slice`.
///
/// Returns `(DecodedPart, bytes_consumed, is_error)` where:
/// - `bytes_consumed` is how many bytes of `slice` this block spans
/// - `is_error` is `true` when `yencoding::decode` returned `Err`
fn decode_one_block(slice: &[u8]) -> (yencoding::DecodedPart, usize, bool) {
    match yencoding::decode(slice) {
        Ok(part) => {
            // Find where =yend line ends within slice so the caller knows
            // how many bytes to skip.
            //
            // If yencoding::decode() succeeded, =yend was definitely in the
            // slice and find_yend_end() must find it too. If it somehow returns
            // None that is a logic error: fall back to advancing past =ybegin
            // only (rather than consuming the whole remaining body) and mark
            // the block as an encoding problem so the caller is not silently
            // misled.
            match find_yend_end(slice) {
                Some(consumed) => (part, consumed, false),
                None => {
                    // yencoding::decode() succeeded, so =yend was definitely
                    // present in the slice — find_yend_end() returning None
                    // here is a logic error in this module.  The decoded bytes
                    // are valid, but we cannot report a correct begin_length
                    // (consumed = only the =ybegin line, not the full block),
                    // so the slice invariant would be violated.  Mark as
                    // is_encoding_problem=true to signal that the offset
                    // metadata is unreliable.
                    debug_assert!(
                        false,
                        "find_yend_end returned None after successful decode — logic error"
                    );
                    let consumed = find_line_end(slice, 0);
                    (part, consumed, true)
                }
            }
        }
        Err(e) => {
            // Build a sentinel DecodedPart for the error case.
            let sentinel = make_error_sentinel(e);
            // Advance past =ybegin line only to ensure forward progress.
            let consumed = find_line_end(slice, 0);
            (sentinel, consumed, true)
        }
    }
}

/// Find the byte offset just past the `=yend` line in `slice`.
/// Returns `None` if no `=yend` line is found (truncated article).
///
/// Matches `=yend` only when followed by a space, `\r`, `\n`, or end-of-slice
/// — the same boundary requirement that `yencoding::decode` uses internally
/// via `strip_keyword(line, b"=yend ")`.  This guard is a safety margin for
/// non-compliant encoders: compliant yEnc encoders cannot produce a data line
/// starting with `=y` because `=` (0x3D) is always escaped, so no well-formed
/// data line can begin with a literal `=` character.
fn find_yend_end(slice: &[u8]) -> Option<usize> {
    let needle = b"=yend";
    let mut pos = 0;
    while pos < slice.len() {
        let rest = &slice[pos..];
        if rest.starts_with(needle) {
            // Require the keyword to be followed by a delimiter so we don't
            // match =yend inside an encoded data line.
            let after = rest.get(needle.len()).copied();
            match after {
                None | Some(b' ') | Some(b'\r') | Some(b'\n') => {
                    return Some(find_line_end(slice, pos));
                }
                _ => {} // false match — continue scanning
            }
        }
        match rest.iter().position(|&b| b == b'\n') {
            Some(rel) => pos += rel + 1,
            None => break,
        }
    }
    None
}

/// Return the byte offset just past the end of the line starting at `pos`
/// within `slice`. If there is no `\n`, returns `slice.len()`.
fn find_line_end(slice: &[u8], pos: usize) -> usize {
    match slice[pos..].iter().position(|&b| b == b'\n') {
        Some(rel) => pos + rel + 1,
        None => slice.len(),
    }
}

/// Build a zero-data `DecodedPart` to use when decode returns an error.
fn make_error_sentinel(_err: yencoding::YencError) -> yencoding::DecodedPart {
    let filename = String::new();
    yencoding::DecodedPart {
        data: Vec::new(),
        metadata: yencoding::YencMetadata {
            filename,
            size: 0,
            line_length: 128,
            total_parts: None,
        },
        part: None,
        part_begin: None,
        part_end: None,
        crc32_verified: false,
        whole_file_crc32: None,
    }
}

// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------

#[cfg(test)]
mod tests {
    use super::*;
    use crate::part::{ParsedPart, TransferEncoding};

    fn make_part(prefix: &[u8], body_bytes: &[u8]) -> (Vec<u8>, ParsedPart) {
        let mut raw = prefix.to_vec();
        let body_offset = raw.len();
        raw.extend_from_slice(body_bytes);
        let part = ParsedPart {
            part_id: "1".to_owned(),
            content_type: "text/plain".to_owned(),
            charset: Some("utf-8".to_owned()),
            transfer_encoding: TransferEncoding::Identity,
            disposition: None,
            filename: None,
            cid: None,
            header_range: (0u32, body_offset as u32),
            body_range: (body_offset as u32, body_bytes.len() as u32),
            children: vec![],
            is_encoding_problem: false,
        };
        (raw, part)
    }

    // Oracle: bytes [0,1,2] → ['*','+',','] (add 42, no escapes).
    // CRC32 of [0,1,2]: python3 -c "import binascii; print(hex(binascii.crc32(bytes([0,1,2]))&0xffffffff))"
    // → 0x0854897f
    const BLOCK_012: &[u8] =
        b"=ybegin line=128 size=3 name=hi.bin\r\n*+,\r\n=yend size=3 crc32=0854897f\r\n";

    // Oracle: bytes [3,4,5] → ['-','.','/'] (add 42).
    // CRC32: python3 -c "print(hex(binascii.crc32(bytes([3,4,5]))&0xffffffff))"
    // → 0xe90156c0
    const BLOCK_345: &[u8] =
        b"=ybegin line=128 size=3 name=other.bin\r\n-./\r\n=yend size=3 crc32=e90156c0\r\n";

    #[test]
    fn single_block_no_preamble() {
        let (raw, part) = make_part(b"", BLOCK_012);
        let blocks = scan_inline_yencode(&raw, &part);
        assert_eq!(blocks.len(), 1);
        assert_eq!(blocks[0].data, &[0u8, 1, 2]);
        assert_eq!(blocks[0].filename, "hi.bin");
        assert_eq!(blocks[0].file_size, 3);
        assert!(blocks[0].crc32_verified);
        assert!(!blocks[0].is_encoding_problem);
        assert_eq!(blocks[0].begin_offset, 0);
        assert_eq!(blocks[0].begin_length, BLOCK_012.len() as u32);
    }

    #[test]
    fn single_block_with_preamble() {
        let preamble = b"Some prose.\r\nMore prose.\r\n";
        let (raw, part) = make_part(b"", &[preamble, BLOCK_012].concat());
        let blocks = scan_inline_yencode(&raw, &part);
        assert_eq!(blocks.len(), 1);
        assert_eq!(blocks[0].data, &[0u8, 1, 2]);
        assert_eq!(blocks[0].begin_offset, preamble.len() as u32);
        assert_eq!(blocks[0].begin_length, BLOCK_012.len() as u32);
        // Verify slice invariant: raw[begin_offset..begin_offset+begin_length] == BLOCK_012
        let start = blocks[0].begin_offset as usize;
        let end = start + blocks[0].begin_length as usize;
        assert_eq!(&raw[start..end], BLOCK_012);
    }

    #[test]
    fn two_sequential_blocks() {
        let separator = b"Some text between blocks.\r\n";
        let body = [BLOCK_012, separator, BLOCK_345].concat();
        let (raw, part) = make_part(b"", &body);

        let blocks = scan_inline_yencode(&raw, &part);
        assert_eq!(blocks.len(), 2, "expected 2 blocks");

        assert_eq!(blocks[0].data, &[0u8, 1, 2]);
        assert_eq!(blocks[0].filename, "hi.bin");
        assert_eq!(blocks[0].begin_offset, 0);

        assert_eq!(blocks[1].data, &[3u8, 4, 5]);
        assert_eq!(blocks[1].filename, "other.bin");
        assert_eq!(
            blocks[1].begin_offset,
            (BLOCK_012.len() + separator.len()) as u32
        );

        // Non-overlapping
        assert!(blocks[0].begin_offset + blocks[0].begin_length <= blocks[1].begin_offset);
    }

    #[test]
    fn block_with_absolute_prefix_offset() {
        let prefix = b"MIME headers here\r\n\r\n";
        let (raw, part) = make_part(prefix, BLOCK_012);
        let blocks = scan_inline_yencode(&raw, &part);
        assert_eq!(blocks.len(), 1);
        // Absolute offset = prefix.len() (body starts there, block at body start)
        assert_eq!(blocks[0].begin_offset, prefix.len() as u32);
        // Verify slice invariant
        let start = blocks[0].begin_offset as usize;
        let end = start + blocks[0].begin_length as usize;
        assert_eq!(&raw[start..end], BLOCK_012);
    }

    #[test]
    fn no_blocks_returns_empty() {
        let (raw, part) = make_part(b"", b"Just plain text.\r\nNo yEnc here.\r\n");
        assert!(scan_inline_yencode(&raw, &part).is_empty());
    }

    #[test]
    fn empty_body_returns_empty() {
        let (raw, part) = make_part(b"", b"");
        assert!(scan_inline_yencode(&raw, &part).is_empty());
    }

    #[test]
    fn out_of_bounds_body_range_returns_empty() {
        let raw = b"short";
        let part = ParsedPart {
            part_id: "1".to_owned(),
            content_type: "text/plain".to_owned(),
            charset: None,
            transfer_encoding: TransferEncoding::Identity,
            disposition: None,
            filename: None,
            cid: None,
            header_range: (0, 0),
            body_range: (3, 100), // end = 103 > 5
            children: vec![],
            is_encoding_problem: false,
        };
        assert!(scan_inline_yencode(raw, &part).is_empty());
    }

    #[test]
    fn overflow_safe_body_range() {
        let raw = b"data";
        let part = ParsedPart {
            part_id: "1".to_owned(),
            content_type: "text/plain".to_owned(),
            charset: None,
            transfer_encoding: TransferEncoding::Identity,
            disposition: None,
            filename: None,
            cid: None,
            header_range: (0, 0),
            body_range: (u32::MAX, 1),
            children: vec![],
            is_encoding_problem: false,
        };
        assert!(scan_inline_yencode(raw, &part).is_empty());
    }

    #[test]
    fn crc_mismatch_sets_is_encoding_problem() {
        // Correct encoding but wrong CRC in =yend.
        let bad = b"=ybegin line=128 size=3 name=f.bin\r\n*+,\r\n=yend size=3 crc32=00000000\r\n";
        let (raw, part) = make_part(b"", bad);
        let blocks = scan_inline_yencode(&raw, &part);
        assert_eq!(blocks.len(), 1);
        assert!(
            blocks[0].is_encoding_problem,
            "CRC mismatch should set is_encoding_problem"
        );
        assert!(
            blocks[0].data.is_empty(),
            "data should be empty on CRC error"
        );
    }

    #[test]
    fn truncated_block_sets_is_encoding_problem() {
        // =yend line absent.
        let trunc = b"=ybegin line=128 size=3 name=f.bin\r\n*+,\r\n";
        let (raw, part) = make_part(b"", trunc);
        let blocks = scan_inline_yencode(&raw, &part);
        assert_eq!(blocks.len(), 1);
        assert!(blocks[0].is_encoding_problem);
    }

    #[test]
    fn ybegin_mid_line_not_matched() {
        // "not =ybegin" — keyword not at line start, must be ignored.
        let body = b"this is not =ybegin a real block\r\n=ybegin line=128 size=3 name=f.bin\r\n*+,\r\n=yend size=3 crc32=0854897f\r\n";
        let (raw, part) = make_part(b"", body);
        let blocks = scan_inline_yencode(&raw, &part);
        // Only the real block at the line boundary should be found.
        assert_eq!(blocks.len(), 1);
        assert_eq!(blocks[0].data, &[0u8, 1, 2]);
    }

    #[test]
    fn multipart_article_fields_populated() {
        // Oracle: multi-part article with =ypart.
        // Encode bytes [0,1,2] as part 1 of 2, begin=1 end=3.
        use yencoding::{encode_part, EncodePartOptions, DEFAULT_LINE_LENGTH};
        let data = [0u8, 1, 2];
        // Oracle: python3 -c "import binascii; print(hex(binascii.crc32(bytes([0,1,2,3,4,5]))&0xffffffff))"
        // → 0x30ebcf4a
        let whole_crc: u32 = 0x30eb_cf4a;
        let opts = EncodePartOptions {
            filename: "split.bin",
            total_size: 6,
            total_parts: 2,
            part: 1,
            begin: 1,
            end: 3,
            whole_file_crc32: whole_crc,
            line_length: DEFAULT_LINE_LENGTH,
        };
        let encoded = encode_part(&data, &opts);
        let (raw, part) = make_part(b"", &encoded);

        let blocks = scan_inline_yencode(&raw, &part);
        assert_eq!(blocks.len(), 1);
        assert_eq!(blocks[0].part, Some(1));
        assert_eq!(blocks[0].total_parts, Some(2));
        assert_eq!(blocks[0].part_begin, Some(1));
        assert_eq!(blocks[0].part_end, Some(3));
        assert_eq!(blocks[0].file_size, 6);
        assert!(blocks[0].crc32_verified);
        // Oracle: bytes [0,1,2] are the decoded payload of this part.
        assert_eq!(
            blocks[0].data,
            &[0u8, 1, 2],
            "decoded bytes must match oracle"
        );
        // Slice invariant: raw[begin_offset..begin_offset+begin_length] == encoded
        let start = blocks[0].begin_offset as usize;
        let end = start + blocks[0].begin_length as usize;
        assert_eq!(
            &raw[start..end],
            encoded.as_slice(),
            "slice invariant must hold for multi-part block"
        );
    }

    // Integration test: full parse() → scan_inline_yencode() pipeline
    #[test]
    fn full_parse_pipeline() {
        use crate::parse;

        // A bare message with no MIME headers — just a yEnc block in the body.
        let raw: Vec<u8> = [
            b"From: poster@example.com\r\n" as &[u8],
            b"Subject: [1/1] hi.bin\r\n",
            b"\r\n",
            b"Some prose.\r\n",
            BLOCK_012,
            b"More prose.\r\n",
        ]
        .concat();

        let msg = parse(&raw).expect("parse failed");
        // Should be a single text/plain part.
        let part = msg.part_index.find_by_id("1").unwrap();
        assert_eq!(part.content_type, "text/plain");

        let blocks = scan_inline_yencode(&raw, part);
        assert_eq!(blocks.len(), 1);
        assert_eq!(blocks[0].data, &[0u8, 1, 2]);
        assert_eq!(blocks[0].filename, "hi.bin");
        assert!(blocks[0].crc32_verified);
    }
}