uuencoding_multi/reassemble.rs
1use crate::{MultiUuError, PartCollection};
2
3/// A successfully reassembled multi-part UU-encoded file.
4///
5/// Returned by [`reassemble`]. The `data` field holds the decoded binary
6/// payload; callers should inspect `is_truncated` before trusting the result.
7///
8/// # Security note
9///
10/// `data` may contain a compressed archive. **This crate never decompresses
11/// the output.** Apply independent size and resource limits before
12/// decompressing to protect against decompression-bomb attacks.
13#[derive(Debug, PartialEq)]
14pub struct ReassembledFile {
15 /// Filename extracted from the `begin` line of the first UU part.
16 ///
17 /// **Security — path traversal**: the filename comes directly from the
18 /// email or Usenet message and is not sanitised. Real-world UU archives
19 /// have been observed with filenames containing `../` sequences. Sanitise
20 /// this value before using it as a filesystem path to prevent directory
21 /// traversal attacks (e.g. reject names containing `/`, `\`, or `..`
22 /// components, and resolve the final path against an allowed base
23 /// directory).
24 pub filename: String,
25 /// Unix permission mode (e.g. `0o644`) from the `begin` line of the first
26 /// part. Subsequent parts may specify different modes; only the first
27 /// part's value is used.
28 pub mode: u32,
29 /// Decoded binary payload.
30 ///
31 /// When [`is_truncated`][Self::is_truncated] is `false`, this is the
32 /// complete decoded file content, formed by concatenating the decoded
33 /// output of every part in ascending `part_number` order.
34 ///
35 /// # When `is_truncated` is `true`
36 ///
37 /// `data` contains **only the decoded bytes of the parts that were
38 /// present**, concatenated in ascending part-number order. This is **not**
39 /// a contiguous region of the reconstructed file: the bytes belonging to
40 /// the absent parts are simply missing from the middle (or start, or end).
41 /// The resulting byte sequence does **not** correspond to any valid file
42 /// offset range.
43 ///
44 /// **Do not write truncated data to disk as if it were a complete file.**
45 /// The bytes are provided for diagnostic inspection only (e.g. logging,
46 /// partial-content display). To obtain a usable file, wait until
47 /// [`is_complete()`][crate::PartCollection::is_complete] returns `true`
48 /// before calling [`reassemble`].
49 pub data: Vec<u8>,
50 /// `true` when one or more parts were absent from the collection, or when
51 /// any individual part's UU body was missing its `end` line. The data is
52 /// likely corrupt in this case.
53 ///
54 /// To distinguish the two truncation causes:
55 /// - `is_truncated && !missing_parts.is_empty()` — gap in the collection.
56 /// - `is_truncated && missing_parts.is_empty()` — all parts were present
57 /// but at least one part's UU body was itself truncated (missing `end`).
58 pub is_truncated: bool,
59 /// Part numbers in `1..=total` that were absent from the collection, in
60 /// ascending order. Empty when the collection was complete.
61 pub missing_parts: Vec<u32>,
62}
63
64/// Reassemble a multi-part UU-encoded file from its parts.
65///
66/// Iterates over all [`PartEntry`][crate::PartEntry] values with
67/// `part_number >= 1` in ascending order (the [`PartCollection`]'s
68/// `BTreeMap` guarantees this). Each part's `body_bytes` is independently
69/// decoded via `uuencoding::decode` and the decoded payloads are concatenated.
70/// `filename` and `mode` are taken from the first part only.
71///
72/// The TOC part (`part_number = 0`), if present, is silently ignored.
73///
74/// # Errors
75///
76/// - [`MultiUuError::EmptyCollection`] — the collection contains no parts
77/// with `part_number >= 1` (including the case of a TOC-only collection).
78/// - [`MultiUuError::DecodeError`] — `uuencoding::decode` returned an error
79/// for one of the parts. Reassembly stops at the first failing part.
80///
81/// # Partial results
82///
83/// When parts are missing the function still returns `Ok` rather than an
84/// error. The result has `is_truncated = true` and `missing_parts` listing
85/// the absent part numbers. `data` contains the decoded bytes of only the
86/// **present** parts concatenated in order.
87///
88/// **This is not a contiguous file region.** The bytes from the missing parts
89/// are absent, so the data does not correspond to a valid byte range within
90/// the original file. Do not write this to disk as a complete file. It is
91/// suitable for diagnostics only. Call
92/// [`PartCollection::is_complete()`][crate::PartCollection::is_complete]
93/// before reassembling if you require a usable result.
94///
95/// # Never panics
96///
97/// This function never panics. The `expect` on the internal `get()` call
98/// is unreachable by construction: `present_parts()` only yields numbers
99/// that are keys in the underlying map.
100///
101/// # Security
102///
103/// The decoded `data` may be a compressed archive. Any subsequent
104/// decompression is the caller's responsibility and must be independently
105/// guarded against decompression-bomb attacks. This crate does not
106/// decompress.
107///
108/// The `filename` field of the returned [`ReassembledFile`] comes from the
109/// email subject or UU `begin` line and is **not sanitised**. Sanitise it
110/// before using it as a filesystem path to prevent directory traversal
111/// attacks.
112///
113/// # Example
114///
115/// ```no_run
116/// use uuencoding_multi::{PartCollection, PartEntry, reassemble};
117///
118/// // In practice `body_bytes` comes from message parts fetched from NNTP
119/// // or a mailbox; `no_run` is used here because constructing valid UU
120/// // bodies inline is verbose.
121/// let mut coll = PartCollection::with_total(2);
122/// coll.add(PartEntry { part_number: 1, body_bytes: todo!(), subject: None }).unwrap();
123/// coll.add(PartEntry { part_number: 2, body_bytes: todo!(), subject: None }).unwrap();
124///
125/// let file = reassemble(&coll).unwrap();
126/// assert!(!file.is_truncated);
127/// // Apply size limits before decompressing file.data.
128/// println!("{}: {} bytes", file.filename, file.data.len());
129/// ```
130pub fn reassemble(collection: &PartCollection) -> Result<ReassembledFile, MultiUuError> {
131 // Collect present part numbers >= 1, in ascending order (BTreeMap guarantees this).
132 let present: Vec<u32> = collection.present_parts().filter(|&n| n >= 1).collect();
133
134 if present.is_empty() {
135 return Err(MultiUuError::EmptyCollection);
136 }
137
138 let missing_parts = collection.missing_parts();
139
140 // Decode each present part individually and concatenate.
141 let mut all_data: Vec<u8> = Vec::new();
142 let mut any_truncated = false;
143 let mut filename = String::new();
144 let mut mode = 0u32;
145 let mut first = true;
146
147 for part_num in &present {
148 let entry = collection
149 .get(*part_num)
150 .unwrap_or_else(|| unreachable!("present_parts listed a part that get() cannot find"));
151 let block = uuencoding::decode(&entry.body_bytes)?;
152
153 if first {
154 filename = block.metadata.filename;
155 mode = block.metadata.mode;
156 first = false;
157 }
158
159 if block.is_truncated {
160 any_truncated = true;
161 }
162
163 all_data.extend_from_slice(&block.data);
164 }
165
166 let is_truncated = any_truncated || !missing_parts.is_empty();
167
168 Ok(ReassembledFile {
169 filename,
170 mode,
171 data: all_data,
172 is_truncated,
173 missing_parts,
174 })
175}
176
177// ---------------------------------------------------------------------------
178// Tests
179// ---------------------------------------------------------------------------
180
181#[cfg(test)]
182mod tests {
183 use super::*;
184 use crate::{PartCollection, PartEntry};
185
186 // ------------------------------------------------------------------
187 // Oracle: Python 3.11 `uu` module, 2026-05-04
188 //
189 // full = b'Hello, World! This is a multi-part test.' (40 bytes)
190 // p1_data = full[:14] = b'Hello, World! '
191 // p2_data = full[14:27] = b'This is a mul'
192 // p3_data = full[27:] = b'ti-part test.'
193 //
194 // uu.encode(p1_data, buf, 'file.bin', 0o644)
195 // => b'begin 644 file.bin\n.2&5L;&\\L(%=O<FQD(2 \n \nend\n'
196 // uu.encode(p2_data, buf, 'file.bin', 0o644)
197 // => b'begin 644 file.bin\n-5&AI<R!I<R!A(&UU; \n \nend\n'
198 // uu.encode(p3_data, buf, 'file.bin', 0o644)
199 // => b'begin 644 file.bin\n-=&DM<&%R="!T97-T+@ \n \nend\n'
200 // ------------------------------------------------------------------
201
202 const PART1_BODY: &[u8] = b"begin 644 file.bin\n.2&5L;&\\L(%=O<FQD(2 \n \nend\n";
203 const PART2_BODY: &[u8] = b"begin 644 file.bin\n-5&AI<R!I<R!A(&UU; \n \nend\n";
204 const PART3_BODY: &[u8] = b"begin 644 file.bin\n-=&DM<&%R=\"!T97-T+@ \n \nend\n";
205
206 fn make_entry(part_number: u32, body: &[u8]) -> PartEntry {
207 PartEntry {
208 part_number,
209 body_bytes: body.to_vec(),
210 subject: None,
211 }
212 }
213
214 // ------------------------------------------------------------------
215 // Single-part reassembly
216 // ------------------------------------------------------------------
217
218 /// Oracle: uu.encode(b'Hello, World! ', 'file.bin', 0o644)
219 /// => b'begin 644 file.bin\n.2&5L;&\\L(%=O<FQD(2 \n \nend\n'
220 /// decoded: b'Hello, World! '
221 #[test]
222 fn single_part_correct_data_and_metadata() {
223 let mut c = PartCollection::with_total(1);
224 c.add(make_entry(1, PART1_BODY)).unwrap();
225
226 let result = reassemble(&c).unwrap();
227 assert_eq!(result.data, b"Hello, World! ");
228 assert_eq!(result.filename, "file.bin");
229 assert_eq!(result.mode, 0o644);
230 assert!(!result.is_truncated);
231 assert!(result.missing_parts.is_empty());
232 }
233
234 // ------------------------------------------------------------------
235 // Three-part reassembly
236 // ------------------------------------------------------------------
237
238 /// Oracle: concatenated decoded bytes of all 3 parts = full 40-byte file.
239 #[test]
240 fn three_parts_full_reassembly() {
241 let mut c = PartCollection::with_total(3);
242 c.add(make_entry(1, PART1_BODY)).unwrap();
243 c.add(make_entry(2, PART2_BODY)).unwrap();
244 c.add(make_entry(3, PART3_BODY)).unwrap();
245
246 let result = reassemble(&c).unwrap();
247 assert_eq!(result.data, b"Hello, World! This is a multi-part test.");
248 assert_eq!(result.filename, "file.bin");
249 assert_eq!(result.mode, 0o644);
250 assert!(!result.is_truncated);
251 assert!(result.missing_parts.is_empty());
252 }
253
254 /// Parts arrive out of order — reassembly must sort ascending.
255 #[test]
256 fn three_parts_out_of_order_still_correct() {
257 let mut c = PartCollection::with_total(3);
258 c.add(make_entry(3, PART3_BODY)).unwrap();
259 c.add(make_entry(1, PART1_BODY)).unwrap();
260 c.add(make_entry(2, PART2_BODY)).unwrap();
261
262 let result = reassemble(&c).unwrap();
263 assert_eq!(result.data, b"Hello, World! This is a multi-part test.");
264 assert!(!result.is_truncated);
265 }
266
267 // ------------------------------------------------------------------
268 // Missing part — is_truncated + missing_parts populated
269 // ------------------------------------------------------------------
270
271 /// Missing part 2 of 3: is_truncated=true, missing_parts=[2],
272 /// data = part1 decoded ++ part3 decoded (parts present: 1 and 3).
273 #[test]
274 fn missing_middle_part_yields_truncated() {
275 let mut c = PartCollection::with_total(3);
276 c.add(make_entry(1, PART1_BODY)).unwrap();
277 // part 2 deliberately omitted
278 c.add(make_entry(3, PART3_BODY)).unwrap();
279
280 let result = reassemble(&c).unwrap();
281 assert!(result.is_truncated);
282 assert_eq!(result.missing_parts, vec![2]);
283 // Data contains part1 + part3 decoded bytes
284 // Oracle: part1 decoded = b'Hello, World! ', part3 decoded = b'ti-part test.'
285 assert_eq!(result.data, b"Hello, World! ti-part test.");
286 }
287
288 // ------------------------------------------------------------------
289 // Empty collection
290 // ------------------------------------------------------------------
291
292 #[test]
293 fn empty_collection_returns_error() {
294 let c = PartCollection::new();
295 let err = reassemble(&c).unwrap_err();
296 assert!(matches!(err, MultiUuError::EmptyCollection));
297 }
298
299 /// Collection with only a TOC part (part_number=0) has no data parts.
300 #[test]
301 fn toc_only_is_empty_collection() {
302 let mut c = PartCollection::new();
303 c.add(PartEntry {
304 part_number: 0,
305 body_bytes: b"toc data".to_vec(),
306 subject: None,
307 })
308 .unwrap();
309 let err = reassemble(&c).unwrap_err();
310 assert!(matches!(err, MultiUuError::EmptyCollection));
311 }
312
313 // ------------------------------------------------------------------
314 // Truncated UU body — all parts present but missing `end` terminator
315 // ------------------------------------------------------------------
316
317 /// All parts are present (no gap) but part 2's body has its ` \nend\n`
318 /// terminator stripped, so uuencoding::decode returns is_truncated=true.
319 /// The result must have is_truncated=true and missing_parts empty.
320 #[test]
321 fn truncated_uu_body_with_all_parts_present() {
322 // PART2_BODY ends with: <data line> + " \n" + "end\n"
323 // Strip the last 6 bytes (" \nend\n") to remove the terminator.
324 let truncated_part2: Vec<u8> = PART2_BODY[..PART2_BODY.len() - 6].to_vec();
325
326 let mut c = PartCollection::with_total(3);
327 c.add(make_entry(1, PART1_BODY)).unwrap();
328 c.add(PartEntry {
329 part_number: 2,
330 body_bytes: truncated_part2,
331 subject: None,
332 })
333 .unwrap();
334 c.add(make_entry(3, PART3_BODY)).unwrap();
335
336 let result = reassemble(&c).unwrap();
337 assert!(result.is_truncated, "body missing `end` must be truncated");
338 assert!(
339 result.missing_parts.is_empty(),
340 "all parts were present; missing_parts must be empty"
341 );
342 }
343
344 // ------------------------------------------------------------------
345 // Decode error on first part
346 // ------------------------------------------------------------------
347
348 /// A corrupt body (no begin line) → DecodeError
349 #[test]
350 fn decode_error_on_first_part_propagates() {
351 let mut c = PartCollection::with_total(1);
352 // Body has no "begin" line → uuencoding::decode returns InvalidBeginLine
353 c.add(make_entry(1, b"this is not valid uu data\n"))
354 .unwrap();
355
356 let err = reassemble(&c).unwrap_err();
357 assert!(matches!(err, MultiUuError::DecodeError(_)));
358 }
359
360 /// A corrupt second part → DecodeError (stops at first error).
361 #[test]
362 fn decode_error_on_second_part_propagates() {
363 let mut c = PartCollection::with_total(2);
364 c.add(make_entry(1, PART1_BODY)).unwrap();
365 c.add(make_entry(2, b"not valid uu\n")).unwrap();
366
367 let err = reassemble(&c).unwrap_err();
368 assert!(matches!(err, MultiUuError::DecodeError(_)));
369 }
370}