Skip to main content

uuencoding_multi/
reassemble.rs

1use crate::{MultiUuError, PartCollection};
2
3/// A successfully reassembled multi-part UU-encoded file.
4///
5/// Returned by [`reassemble`]. The `data` field holds the decoded binary
6/// payload; callers should inspect `is_truncated` before trusting the result.
7///
8/// # Security note
9///
10/// `data` may contain a compressed archive. **This crate never decompresses
11/// the output.** Apply independent size and resource limits before
12/// decompressing to protect against decompression-bomb attacks.
13#[derive(Debug, PartialEq)]
14pub struct ReassembledFile {
15    /// Filename extracted from the `begin` line of the first UU part.
16    ///
17    /// **Security — path traversal**: the filename comes directly from the
18    /// email or Usenet message and is not sanitised. Real-world UU archives
19    /// have been observed with filenames containing `../` sequences. Sanitise
20    /// this value before using it as a filesystem path to prevent directory
21    /// traversal attacks (e.g. reject names containing `/`, `\`, or `..`
22    /// components, and resolve the final path against an allowed base
23    /// directory).
24    pub filename: String,
25    /// Unix permission mode (e.g. `0o644`) from the `begin` line of the first
26    /// part. Subsequent parts may specify different modes; only the first
27    /// part's value is used.
28    pub mode: u32,
29    /// Decoded binary payload.
30    ///
31    /// When [`is_truncated`][Self::is_truncated] is `false`, this is the
32    /// complete decoded file content, formed by concatenating the decoded
33    /// output of every part in ascending `part_number` order.
34    ///
35    /// # When `is_truncated` is `true`
36    ///
37    /// `data` contains **only the decoded bytes of the parts that were
38    /// present**, concatenated in ascending part-number order. This is **not**
39    /// a contiguous region of the reconstructed file: the bytes belonging to
40    /// the absent parts are simply missing from the middle (or start, or end).
41    /// The resulting byte sequence does **not** correspond to any valid file
42    /// offset range.
43    ///
44    /// **Do not write truncated data to disk as if it were a complete file.**
45    /// The bytes are provided for diagnostic inspection only (e.g. logging,
46    /// partial-content display). To obtain a usable file, wait until
47    /// [`is_complete()`][crate::PartCollection::is_complete] returns `true`
48    /// before calling [`reassemble`].
49    pub data: Vec<u8>,
50    /// `true` when one or more parts were absent from the collection, or when
51    /// any individual part's UU body was missing its `end` line. The data is
52    /// likely corrupt in this case.
53    ///
54    /// To distinguish the two truncation causes:
55    /// - `is_truncated && !missing_parts.is_empty()` — gap in the collection.
56    /// - `is_truncated && missing_parts.is_empty()` — all parts were present
57    ///   but at least one part's UU body was itself truncated (missing `end`).
58    pub is_truncated: bool,
59    /// Part numbers in `1..=total` that were absent from the collection, in
60    /// ascending order. Empty when the collection was complete.
61    pub missing_parts: Vec<u32>,
62}
63
64/// Reassemble a multi-part UU-encoded file from its parts.
65///
66/// Iterates over all [`PartEntry`][crate::PartEntry] values with
67/// `part_number >= 1` in ascending order (the [`PartCollection`]'s
68/// `BTreeMap` guarantees this). Each part's `body_bytes` is independently
69/// decoded via `uuencoding::decode` and the decoded payloads are concatenated.
70/// `filename` and `mode` are taken from the first part only.
71///
72/// The TOC part (`part_number = 0`), if present, is silently ignored.
73///
74/// # Errors
75///
76/// - [`MultiUuError::EmptyCollection`] — the collection contains no parts
77///   with `part_number >= 1` (including the case of a TOC-only collection).
78/// - [`MultiUuError::DecodeError`] — `uuencoding::decode` returned an error
79///   for one of the parts. Reassembly stops at the first failing part.
80///
81/// # Partial results
82///
83/// When parts are missing the function still returns `Ok` rather than an
84/// error. The result has `is_truncated = true` and `missing_parts` listing
85/// the absent part numbers. `data` contains the decoded bytes of only the
86/// **present** parts concatenated in order.
87///
88/// **This is not a contiguous file region.** The bytes from the missing parts
89/// are absent, so the data does not correspond to a valid byte range within
90/// the original file. Do not write this to disk as a complete file. It is
91/// suitable for diagnostics only. Call
92/// [`PartCollection::is_complete()`][crate::PartCollection::is_complete]
93/// before reassembling if you require a usable result.
94///
95/// # Never panics
96///
97/// This function never panics. The `expect` on the internal `get()` call
98/// is unreachable by construction: `present_parts()` only yields numbers
99/// that are keys in the underlying map.
100///
101/// # Security
102///
103/// The decoded `data` may be a compressed archive. Any subsequent
104/// decompression is the caller's responsibility and must be independently
105/// guarded against decompression-bomb attacks. This crate does not
106/// decompress.
107///
108/// The `filename` field of the returned [`ReassembledFile`] comes from the
109/// email subject or UU `begin` line and is **not sanitised**. Sanitise it
110/// before using it as a filesystem path to prevent directory traversal
111/// attacks.
112///
113/// # Example
114///
115/// ```no_run
116/// use uuencoding_multi::{PartCollection, PartEntry, reassemble};
117///
118/// // In practice `body_bytes` comes from message parts fetched from NNTP
119/// // or a mailbox; `no_run` is used here because constructing valid UU
120/// // bodies inline is verbose.
121/// let mut coll = PartCollection::with_total(2);
122/// coll.add(PartEntry { part_number: 1, body_bytes: todo!(), subject: None }).unwrap();
123/// coll.add(PartEntry { part_number: 2, body_bytes: todo!(), subject: None }).unwrap();
124///
125/// let file = reassemble(&coll).unwrap();
126/// assert!(!file.is_truncated);
127/// // Apply size limits before decompressing file.data.
128/// println!("{}: {} bytes", file.filename, file.data.len());
129/// ```
130pub fn reassemble(collection: &PartCollection) -> Result<ReassembledFile, MultiUuError> {
131    // Collect present part numbers >= 1, in ascending order (BTreeMap guarantees this).
132    let present: Vec<u32> = collection.present_parts().filter(|&n| n >= 1).collect();
133
134    if present.is_empty() {
135        return Err(MultiUuError::EmptyCollection);
136    }
137
138    let missing_parts = collection.missing_parts();
139
140    // Decode each present part individually and concatenate.
141    let mut all_data: Vec<u8> = Vec::new();
142    let mut any_truncated = false;
143    let mut filename = String::new();
144    let mut mode = 0u32;
145    let mut first = true;
146
147    for part_num in &present {
148        let entry = collection
149            .get(*part_num)
150            .unwrap_or_else(|| unreachable!("present_parts listed a part that get() cannot find"));
151        let block = uuencoding::decode(&entry.body_bytes)?;
152
153        if first {
154            filename = block.metadata.filename;
155            mode = block.metadata.mode;
156            first = false;
157        }
158
159        if block.is_truncated {
160            any_truncated = true;
161        }
162
163        all_data.extend_from_slice(&block.data);
164    }
165
166    let is_truncated = any_truncated || !missing_parts.is_empty();
167
168    Ok(ReassembledFile {
169        filename,
170        mode,
171        data: all_data,
172        is_truncated,
173        missing_parts,
174    })
175}
176
177// ---------------------------------------------------------------------------
178// Tests
179// ---------------------------------------------------------------------------
180
181#[cfg(test)]
182mod tests {
183    use super::*;
184    use crate::{PartCollection, PartEntry};
185
186    // ------------------------------------------------------------------
187    // Oracle: Python 3.11 `uu` module, 2026-05-04
188    //
189    //   full = b'Hello, World! This is a multi-part test.'  (40 bytes)
190    //   p1_data = full[:14]  = b'Hello, World! '
191    //   p2_data = full[14:27] = b'This is a mul'
192    //   p3_data = full[27:]  = b'ti-part test.'
193    //
194    //   uu.encode(p1_data, buf, 'file.bin', 0o644)
195    //     => b'begin 644 file.bin\n.2&5L;&\\L(%=O<FQD(2  \n \nend\n'
196    //   uu.encode(p2_data, buf, 'file.bin', 0o644)
197    //     => b'begin 644 file.bin\n-5&AI<R!I<R!A(&UU;   \n \nend\n'
198    //   uu.encode(p3_data, buf, 'file.bin', 0o644)
199    //     => b'begin 644 file.bin\n-=&DM<&%R="!T97-T+@  \n \nend\n'
200    // ------------------------------------------------------------------
201
202    const PART1_BODY: &[u8] = b"begin 644 file.bin\n.2&5L;&\\L(%=O<FQD(2  \n \nend\n";
203    const PART2_BODY: &[u8] = b"begin 644 file.bin\n-5&AI<R!I<R!A(&UU;   \n \nend\n";
204    const PART3_BODY: &[u8] = b"begin 644 file.bin\n-=&DM<&%R=\"!T97-T+@  \n \nend\n";
205
206    fn make_entry(part_number: u32, body: &[u8]) -> PartEntry {
207        PartEntry {
208            part_number,
209            body_bytes: body.to_vec(),
210            subject: None,
211        }
212    }
213
214    // ------------------------------------------------------------------
215    // Single-part reassembly
216    // ------------------------------------------------------------------
217
218    /// Oracle: uu.encode(b'Hello, World! ', 'file.bin', 0o644)
219    ///   => b'begin 644 file.bin\n.2&5L;&\\L(%=O<FQD(2  \n \nend\n'
220    ///   decoded: b'Hello, World! '
221    #[test]
222    fn single_part_correct_data_and_metadata() {
223        let mut c = PartCollection::with_total(1);
224        c.add(make_entry(1, PART1_BODY)).unwrap();
225
226        let result = reassemble(&c).unwrap();
227        assert_eq!(result.data, b"Hello, World! ");
228        assert_eq!(result.filename, "file.bin");
229        assert_eq!(result.mode, 0o644);
230        assert!(!result.is_truncated);
231        assert!(result.missing_parts.is_empty());
232    }
233
234    // ------------------------------------------------------------------
235    // Three-part reassembly
236    // ------------------------------------------------------------------
237
238    /// Oracle: concatenated decoded bytes of all 3 parts = full 40-byte file.
239    #[test]
240    fn three_parts_full_reassembly() {
241        let mut c = PartCollection::with_total(3);
242        c.add(make_entry(1, PART1_BODY)).unwrap();
243        c.add(make_entry(2, PART2_BODY)).unwrap();
244        c.add(make_entry(3, PART3_BODY)).unwrap();
245
246        let result = reassemble(&c).unwrap();
247        assert_eq!(result.data, b"Hello, World! This is a multi-part test.");
248        assert_eq!(result.filename, "file.bin");
249        assert_eq!(result.mode, 0o644);
250        assert!(!result.is_truncated);
251        assert!(result.missing_parts.is_empty());
252    }
253
254    /// Parts arrive out of order — reassembly must sort ascending.
255    #[test]
256    fn three_parts_out_of_order_still_correct() {
257        let mut c = PartCollection::with_total(3);
258        c.add(make_entry(3, PART3_BODY)).unwrap();
259        c.add(make_entry(1, PART1_BODY)).unwrap();
260        c.add(make_entry(2, PART2_BODY)).unwrap();
261
262        let result = reassemble(&c).unwrap();
263        assert_eq!(result.data, b"Hello, World! This is a multi-part test.");
264        assert!(!result.is_truncated);
265    }
266
267    // ------------------------------------------------------------------
268    // Missing part — is_truncated + missing_parts populated
269    // ------------------------------------------------------------------
270
271    /// Missing part 2 of 3: is_truncated=true, missing_parts=[2],
272    /// data = part1 decoded ++ part3 decoded (parts present: 1 and 3).
273    #[test]
274    fn missing_middle_part_yields_truncated() {
275        let mut c = PartCollection::with_total(3);
276        c.add(make_entry(1, PART1_BODY)).unwrap();
277        // part 2 deliberately omitted
278        c.add(make_entry(3, PART3_BODY)).unwrap();
279
280        let result = reassemble(&c).unwrap();
281        assert!(result.is_truncated);
282        assert_eq!(result.missing_parts, vec![2]);
283        // Data contains part1 + part3 decoded bytes
284        // Oracle: part1 decoded = b'Hello, World! ', part3 decoded = b'ti-part test.'
285        assert_eq!(result.data, b"Hello, World! ti-part test.");
286    }
287
288    // ------------------------------------------------------------------
289    // Empty collection
290    // ------------------------------------------------------------------
291
292    #[test]
293    fn empty_collection_returns_error() {
294        let c = PartCollection::new();
295        let err = reassemble(&c).unwrap_err();
296        assert!(matches!(err, MultiUuError::EmptyCollection));
297    }
298
299    /// Collection with only a TOC part (part_number=0) has no data parts.
300    #[test]
301    fn toc_only_is_empty_collection() {
302        let mut c = PartCollection::new();
303        c.add(PartEntry {
304            part_number: 0,
305            body_bytes: b"toc data".to_vec(),
306            subject: None,
307        })
308        .unwrap();
309        let err = reassemble(&c).unwrap_err();
310        assert!(matches!(err, MultiUuError::EmptyCollection));
311    }
312
313    // ------------------------------------------------------------------
314    // Truncated UU body — all parts present but missing `end` terminator
315    // ------------------------------------------------------------------
316
317    /// All parts are present (no gap) but part 2's body has its ` \nend\n`
318    /// terminator stripped, so uuencoding::decode returns is_truncated=true.
319    /// The result must have is_truncated=true and missing_parts empty.
320    #[test]
321    fn truncated_uu_body_with_all_parts_present() {
322        // PART2_BODY ends with: <data line> + " \n" + "end\n"
323        // Strip the last 6 bytes (" \nend\n") to remove the terminator.
324        let truncated_part2: Vec<u8> = PART2_BODY[..PART2_BODY.len() - 6].to_vec();
325
326        let mut c = PartCollection::with_total(3);
327        c.add(make_entry(1, PART1_BODY)).unwrap();
328        c.add(PartEntry {
329            part_number: 2,
330            body_bytes: truncated_part2,
331            subject: None,
332        })
333        .unwrap();
334        c.add(make_entry(3, PART3_BODY)).unwrap();
335
336        let result = reassemble(&c).unwrap();
337        assert!(result.is_truncated, "body missing `end` must be truncated");
338        assert!(
339            result.missing_parts.is_empty(),
340            "all parts were present; missing_parts must be empty"
341        );
342    }
343
344    // ------------------------------------------------------------------
345    // Decode error on first part
346    // ------------------------------------------------------------------
347
348    /// A corrupt body (no begin line) → DecodeError
349    #[test]
350    fn decode_error_on_first_part_propagates() {
351        let mut c = PartCollection::with_total(1);
352        // Body has no "begin" line → uuencoding::decode returns InvalidBeginLine
353        c.add(make_entry(1, b"this is not valid uu data\n"))
354            .unwrap();
355
356        let err = reassemble(&c).unwrap_err();
357        assert!(matches!(err, MultiUuError::DecodeError(_)));
358    }
359
360    /// A corrupt second part → DecodeError (stops at first error).
361    #[test]
362    fn decode_error_on_second_part_propagates() {
363        let mut c = PartCollection::with_total(2);
364        c.add(make_entry(1, PART1_BODY)).unwrap();
365        c.add(make_entry(2, b"not valid uu\n")).unwrap();
366
367        let err = reassemble(&c).unwrap_err();
368        assert!(matches!(err, MultiUuError::DecodeError(_)));
369    }
370}