Skip to main content

uuencoding_multi/
reassemble.rs

1use crate::{MultiUuError, PartCollection};
2
3/// A successfully reassembled multi-part UU-encoded file.
4///
5/// Returned by [`reassemble`]. The `data` field holds the decoded binary
6/// payload; callers should inspect `is_truncated` before trusting the result.
7///
8/// # Security note
9///
10/// `data` may contain a compressed archive. **This crate never decompresses
11/// the output.** Apply independent size and resource limits before
12/// decompressing to protect against decompression-bomb attacks.
13#[derive(Debug)]
14pub struct ReassembledFile {
15    /// Filename extracted from the `begin` line of the first UU part.
16    ///
17    /// This is whatever filename appeared in the source message; it is not
18    /// sanitised. Callers that write this to disk should validate it against
19    /// path-traversal patterns.
20    pub filename: String,
21    /// Unix permission mode (e.g. `0o644`) from the `begin` line of the first
22    /// part. Subsequent parts may specify different modes; only the first
23    /// part's value is used.
24    pub mode: u32,
25    /// Decoded binary payload, formed by concatenating the decoded output of
26    /// each present part in ascending `part_number` order.
27    ///
28    /// When `is_truncated` is `true` this slice is incomplete: it contains
29    /// only the bytes contributed by the parts that were present.
30    pub data: Vec<u8>,
31    /// `true` when one or more parts were absent from the collection, or when
32    /// any individual part's UU body was missing its `end` line. The data is
33    /// likely corrupt in this case.
34    pub is_truncated: bool,
35    /// Part numbers in `1..=total` that were absent from the collection, in
36    /// ascending order. Empty when the collection was complete.
37    pub missing_parts: Vec<u32>,
38}
39
40/// Reassemble a multi-part UU-encoded file from its parts.
41///
42/// Iterates over all [`PartEntry`][crate::PartEntry] values with
43/// `part_number >= 1` in ascending order (the [`PartCollection`]'s
44/// `BTreeMap` guarantees this). Each part's `body_bytes` is independently
45/// decoded via `uuencoding::decode` and the decoded payloads are concatenated.
46/// `filename` and `mode` are taken from the first part only.
47///
48/// The TOC part (`part_number = 0`), if present, is silently ignored.
49///
50/// # Errors
51///
52/// - [`MultiUuError::EmptyCollection`] — the collection contains no parts
53///   with `part_number >= 1` (including the case of a TOC-only collection).
54/// - [`MultiUuError::DecodeError`] — `uuencoding::decode` returned an error
55///   for one of the parts. Reassembly stops at the first failing part.
56///
57/// # Partial results
58///
59/// When parts are missing the function still returns `Ok` rather than an
60/// error. The result has `is_truncated = true` and `missing_parts` listing
61/// the absent part numbers. `data` contains the concatenation of the parts
62/// that *were* present, which is useful for diagnostic inspection.
63///
64/// # Never panics
65///
66/// This function never panics. The `expect` on the internal `get()` call
67/// is unreachable by construction: `present_parts()` only yields numbers
68/// that are keys in the underlying map.
69///
70/// # Security
71///
72/// The decoded `data` may be a compressed archive. Any subsequent
73/// decompression is the caller's responsibility and must be independently
74/// guarded against decompression-bomb attacks. This crate does not
75/// decompress.
76///
77/// # Example
78///
79/// ```no_run
80/// use uuencoding_multi::{PartCollection, PartEntry, reassemble};
81///
82/// // In practice `body_bytes` comes from message parts fetched from NNTP
83/// // or a mailbox; `no_run` is used here because constructing valid UU
84/// // bodies inline is verbose.
85/// let mut coll = PartCollection::with_total(2);
86/// coll.add(PartEntry { part_number: 1, body_bytes: todo!(), subject: None }).unwrap();
87/// coll.add(PartEntry { part_number: 2, body_bytes: todo!(), subject: None }).unwrap();
88///
89/// let file = reassemble(&coll).unwrap();
90/// assert!(!file.is_truncated);
91/// // Apply size limits before decompressing file.data.
92/// println!("{}: {} bytes", file.filename, file.data.len());
93/// ```
94pub fn reassemble(collection: &PartCollection) -> Result<ReassembledFile, MultiUuError> {
95    // Collect present part numbers >= 1, in ascending order (BTreeMap guarantees this).
96    let present: Vec<u32> = collection.present_parts().filter(|&n| n >= 1).collect();
97
98    if present.is_empty() {
99        return Err(MultiUuError::EmptyCollection);
100    }
101
102    let missing_parts = collection.missing_parts();
103
104    // Decode each present part individually and concatenate.
105    let mut all_data: Vec<u8> = Vec::new();
106    let mut any_truncated = false;
107    let mut filename = String::new();
108    let mut mode = 0u32;
109    let mut first = true;
110
111    for part_num in &present {
112        let entry = collection
113            .get(*part_num)
114            .expect("present_parts listed a part that get() cannot find");
115        let block = uuencoding::decode(&entry.body_bytes).map_err(MultiUuError::DecodeError)?;
116
117        if first {
118            filename = block.metadata.filename;
119            mode = block.metadata.mode;
120            first = false;
121        }
122
123        if block.is_truncated {
124            any_truncated = true;
125        }
126
127        all_data.extend_from_slice(&block.data);
128    }
129
130    let is_truncated = any_truncated || !missing_parts.is_empty();
131
132    Ok(ReassembledFile {
133        filename,
134        mode,
135        data: all_data,
136        is_truncated,
137        missing_parts,
138    })
139}
140
141// ---------------------------------------------------------------------------
142// Tests
143// ---------------------------------------------------------------------------
144
145#[cfg(test)]
146mod tests {
147    use super::*;
148    use crate::{PartCollection, PartEntry};
149
150    // ------------------------------------------------------------------
151    // Oracle: Python 3.11 `uu` module, 2026-05-04
152    //
153    //   full = b'Hello, World! This is a multi-part test.'  (40 bytes)
154    //   p1_data = full[:14]  = b'Hello, World! '
155    //   p2_data = full[14:27] = b'This is a mul'
156    //   p3_data = full[27:]  = b'ti-part test.'
157    //
158    //   uu.encode(p1_data, buf, 'file.bin', 0o644)
159    //     => b'begin 644 file.bin\n.2&5L;&\\L(%=O<FQD(2  \n \nend\n'
160    //   uu.encode(p2_data, buf, 'file.bin', 0o644)
161    //     => b'begin 644 file.bin\n-5&AI<R!I<R!A(&UU;   \n \nend\n'
162    //   uu.encode(p3_data, buf, 'file.bin', 0o644)
163    //     => b'begin 644 file.bin\n-=&DM<&%R="!T97-T+@  \n \nend\n'
164    // ------------------------------------------------------------------
165
166    const PART1_BODY: &[u8] = b"begin 644 file.bin\n.2&5L;&\\L(%=O<FQD(2  \n \nend\n";
167    const PART2_BODY: &[u8] = b"begin 644 file.bin\n-5&AI<R!I<R!A(&UU;   \n \nend\n";
168    const PART3_BODY: &[u8] = b"begin 644 file.bin\n-=&DM<&%R=\"!T97-T+@  \n \nend\n";
169
170    fn make_entry(part_number: u32, body: &[u8]) -> PartEntry {
171        PartEntry {
172            part_number,
173            body_bytes: body.to_vec(),
174            subject: None,
175        }
176    }
177
178    // ------------------------------------------------------------------
179    // Single-part reassembly
180    // ------------------------------------------------------------------
181
182    /// Oracle: uu.encode(b'Hello, World! ', 'file.bin', 0o644)
183    ///   => b'begin 644 file.bin\n.2&5L;&\\L(%=O<FQD(2  \n \nend\n'
184    ///   decoded: b'Hello, World! '
185    #[test]
186    fn single_part_correct_data_and_metadata() {
187        let mut c = PartCollection::with_total(1);
188        c.add(make_entry(1, PART1_BODY)).unwrap();
189
190        let result = reassemble(&c).unwrap();
191        assert_eq!(result.data, b"Hello, World! ");
192        assert_eq!(result.filename, "file.bin");
193        assert_eq!(result.mode, 0o644);
194        assert!(!result.is_truncated);
195        assert!(result.missing_parts.is_empty());
196    }
197
198    // ------------------------------------------------------------------
199    // Three-part reassembly
200    // ------------------------------------------------------------------
201
202    /// Oracle: concatenated decoded bytes of all 3 parts = full 40-byte file.
203    #[test]
204    fn three_parts_full_reassembly() {
205        let mut c = PartCollection::with_total(3);
206        c.add(make_entry(1, PART1_BODY)).unwrap();
207        c.add(make_entry(2, PART2_BODY)).unwrap();
208        c.add(make_entry(3, PART3_BODY)).unwrap();
209
210        let result = reassemble(&c).unwrap();
211        assert_eq!(result.data, b"Hello, World! This is a multi-part test.");
212        assert_eq!(result.filename, "file.bin");
213        assert_eq!(result.mode, 0o644);
214        assert!(!result.is_truncated);
215        assert!(result.missing_parts.is_empty());
216    }
217
218    /// Parts arrive out of order — reassembly must sort ascending.
219    #[test]
220    fn three_parts_out_of_order_still_correct() {
221        let mut c = PartCollection::with_total(3);
222        c.add(make_entry(3, PART3_BODY)).unwrap();
223        c.add(make_entry(1, PART1_BODY)).unwrap();
224        c.add(make_entry(2, PART2_BODY)).unwrap();
225
226        let result = reassemble(&c).unwrap();
227        assert_eq!(result.data, b"Hello, World! This is a multi-part test.");
228        assert!(!result.is_truncated);
229    }
230
231    // ------------------------------------------------------------------
232    // Missing part — is_truncated + missing_parts populated
233    // ------------------------------------------------------------------
234
235    /// Missing part 2 of 3: is_truncated=true, missing_parts=[2],
236    /// data = part1 decoded ++ part3 decoded (parts present: 1 and 3).
237    #[test]
238    fn missing_middle_part_yields_truncated() {
239        let mut c = PartCollection::with_total(3);
240        c.add(make_entry(1, PART1_BODY)).unwrap();
241        // part 2 deliberately omitted
242        c.add(make_entry(3, PART3_BODY)).unwrap();
243
244        let result = reassemble(&c).unwrap();
245        assert!(result.is_truncated);
246        assert_eq!(result.missing_parts, vec![2]);
247        // Data contains part1 + part3 decoded bytes
248        // Oracle: part1 decoded = b'Hello, World! ', part3 decoded = b'ti-part test.'
249        assert_eq!(result.data, b"Hello, World! ti-part test.");
250    }
251
252    // ------------------------------------------------------------------
253    // Empty collection
254    // ------------------------------------------------------------------
255
256    #[test]
257    fn empty_collection_returns_error() {
258        let c = PartCollection::new();
259        let err = reassemble(&c).unwrap_err();
260        assert!(matches!(err, MultiUuError::EmptyCollection));
261    }
262
263    /// Collection with only a TOC part (part_number=0) has no data parts.
264    #[test]
265    fn toc_only_is_empty_collection() {
266        let mut c = PartCollection::new();
267        c.add(PartEntry {
268            part_number: 0,
269            body_bytes: b"toc data".to_vec(),
270            subject: None,
271        })
272        .unwrap();
273        let err = reassemble(&c).unwrap_err();
274        assert!(matches!(err, MultiUuError::EmptyCollection));
275    }
276
277    // ------------------------------------------------------------------
278    // Decode error on first part
279    // ------------------------------------------------------------------
280
281    /// A corrupt body (no begin line) → DecodeError
282    #[test]
283    fn decode_error_on_first_part_propagates() {
284        let mut c = PartCollection::with_total(1);
285        // Body has no "begin" line → uuencoding::decode returns InvalidBeginLine
286        c.add(make_entry(1, b"this is not valid uu data\n"))
287            .unwrap();
288
289        let err = reassemble(&c).unwrap_err();
290        assert!(matches!(err, MultiUuError::DecodeError(_)));
291    }
292
293    /// A corrupt second part → DecodeError (stops at first error).
294    #[test]
295    fn decode_error_on_second_part_propagates() {
296        let mut c = PartCollection::with_total(2);
297        c.add(make_entry(1, PART1_BODY)).unwrap();
298        c.add(make_entry(2, b"not valid uu\n")).unwrap();
299
300        let err = reassemble(&c).unwrap_err();
301        assert!(matches!(err, MultiUuError::DecodeError(_)));
302    }
303}