uuencoding_multi/reassemble.rs
1use crate::{MultiUuError, PartCollection};
2
3/// A successfully reassembled multi-part UU-encoded file.
4///
5/// Returned by [`reassemble`]. The `data` field holds the decoded binary
6/// payload; callers should inspect `is_truncated` before trusting the result.
7///
8/// # Security note
9///
10/// `data` may contain a compressed archive. **This crate never decompresses
11/// the output.** Apply independent size and resource limits before
12/// decompressing to protect against decompression-bomb attacks.
13#[derive(Debug)]
14pub struct ReassembledFile {
15 /// Filename extracted from the `begin` line of the first UU part.
16 ///
17 /// This is whatever filename appeared in the source message; it is not
18 /// sanitised. Callers that write this to disk should validate it against
19 /// path-traversal patterns.
20 pub filename: String,
21 /// Unix permission mode (e.g. `0o644`) from the `begin` line of the first
22 /// part. Subsequent parts may specify different modes; only the first
23 /// part's value is used.
24 pub mode: u32,
25 /// Decoded binary payload, formed by concatenating the decoded output of
26 /// each present part in ascending `part_number` order.
27 ///
28 /// When `is_truncated` is `true` this slice is incomplete: it contains
29 /// only the bytes contributed by the parts that were present.
30 pub data: Vec<u8>,
31 /// `true` when one or more parts were absent from the collection, or when
32 /// any individual part's UU body was missing its `end` line. The data is
33 /// likely corrupt in this case.
34 ///
35 /// To distinguish the two truncation causes:
36 /// - `is_truncated && !missing_parts.is_empty()` — gap in the collection.
37 /// - `is_truncated && missing_parts.is_empty()` — all parts were present
38 /// but at least one part's UU body was itself truncated (missing `end`).
39 pub is_truncated: bool,
40 /// Part numbers in `1..=total` that were absent from the collection, in
41 /// ascending order. Empty when the collection was complete.
42 pub missing_parts: Vec<u32>,
43}
44
45/// Reassemble a multi-part UU-encoded file from its parts.
46///
47/// Iterates over all [`PartEntry`][crate::PartEntry] values with
48/// `part_number >= 1` in ascending order (the [`PartCollection`]'s
49/// `BTreeMap` guarantees this). Each part's `body_bytes` is independently
50/// decoded via `uuencoding::decode` and the decoded payloads are concatenated.
51/// `filename` and `mode` are taken from the first part only.
52///
53/// The TOC part (`part_number = 0`), if present, is silently ignored.
54///
55/// # Errors
56///
57/// - [`MultiUuError::EmptyCollection`] — the collection contains no parts
58/// with `part_number >= 1` (including the case of a TOC-only collection).
59/// - [`MultiUuError::DecodeError`] — `uuencoding::decode` returned an error
60/// for one of the parts. Reassembly stops at the first failing part.
61///
62/// # Partial results
63///
64/// When parts are missing the function still returns `Ok` rather than an
65/// error. The result has `is_truncated = true` and `missing_parts` listing
66/// the absent part numbers. `data` contains the concatenation of the parts
67/// that *were* present, which is useful for diagnostic inspection.
68///
69/// # Never panics
70///
71/// This function never panics. The `expect` on the internal `get()` call
72/// is unreachable by construction: `present_parts()` only yields numbers
73/// that are keys in the underlying map.
74///
75/// # Security
76///
77/// The decoded `data` may be a compressed archive. Any subsequent
78/// decompression is the caller's responsibility and must be independently
79/// guarded against decompression-bomb attacks. This crate does not
80/// decompress.
81///
82/// # Example
83///
84/// ```no_run
85/// use uuencoding_multi::{PartCollection, PartEntry, reassemble};
86///
87/// // In practice `body_bytes` comes from message parts fetched from NNTP
88/// // or a mailbox; `no_run` is used here because constructing valid UU
89/// // bodies inline is verbose.
90/// let mut coll = PartCollection::with_total(2);
91/// coll.add(PartEntry { part_number: 1, body_bytes: todo!(), subject: None }).unwrap();
92/// coll.add(PartEntry { part_number: 2, body_bytes: todo!(), subject: None }).unwrap();
93///
94/// let file = reassemble(&coll).unwrap();
95/// assert!(!file.is_truncated);
96/// // Apply size limits before decompressing file.data.
97/// println!("{}: {} bytes", file.filename, file.data.len());
98/// ```
99pub fn reassemble(collection: &PartCollection) -> Result<ReassembledFile, MultiUuError> {
100 // Collect present part numbers >= 1, in ascending order (BTreeMap guarantees this).
101 let present: Vec<u32> = collection.present_parts().filter(|&n| n >= 1).collect();
102
103 if present.is_empty() {
104 return Err(MultiUuError::EmptyCollection);
105 }
106
107 let missing_parts = collection.missing_parts();
108
109 // Decode each present part individually and concatenate.
110 let mut all_data: Vec<u8> = Vec::new();
111 let mut any_truncated = false;
112 let mut filename = String::new();
113 let mut mode = 0u32;
114 let mut first = true;
115
116 for part_num in &present {
117 let entry = collection
118 .get(*part_num)
119 .expect("present_parts listed a part that get() cannot find");
120 let block = uuencoding::decode(&entry.body_bytes).map_err(MultiUuError::DecodeError)?;
121
122 if first {
123 filename = block.metadata.filename;
124 mode = block.metadata.mode;
125 first = false;
126 }
127
128 if block.is_truncated {
129 any_truncated = true;
130 }
131
132 all_data.extend_from_slice(&block.data);
133 }
134
135 let is_truncated = any_truncated || !missing_parts.is_empty();
136
137 Ok(ReassembledFile {
138 filename,
139 mode,
140 data: all_data,
141 is_truncated,
142 missing_parts,
143 })
144}
145
146// ---------------------------------------------------------------------------
147// Tests
148// ---------------------------------------------------------------------------
149
150#[cfg(test)]
151mod tests {
152 use super::*;
153 use crate::{PartCollection, PartEntry};
154
155 // ------------------------------------------------------------------
156 // Oracle: Python 3.11 `uu` module, 2026-05-04
157 //
158 // full = b'Hello, World! This is a multi-part test.' (40 bytes)
159 // p1_data = full[:14] = b'Hello, World! '
160 // p2_data = full[14:27] = b'This is a mul'
161 // p3_data = full[27:] = b'ti-part test.'
162 //
163 // uu.encode(p1_data, buf, 'file.bin', 0o644)
164 // => b'begin 644 file.bin\n.2&5L;&\\L(%=O<FQD(2 \n \nend\n'
165 // uu.encode(p2_data, buf, 'file.bin', 0o644)
166 // => b'begin 644 file.bin\n-5&AI<R!I<R!A(&UU; \n \nend\n'
167 // uu.encode(p3_data, buf, 'file.bin', 0o644)
168 // => b'begin 644 file.bin\n-=&DM<&%R="!T97-T+@ \n \nend\n'
169 // ------------------------------------------------------------------
170
171 const PART1_BODY: &[u8] = b"begin 644 file.bin\n.2&5L;&\\L(%=O<FQD(2 \n \nend\n";
172 const PART2_BODY: &[u8] = b"begin 644 file.bin\n-5&AI<R!I<R!A(&UU; \n \nend\n";
173 const PART3_BODY: &[u8] = b"begin 644 file.bin\n-=&DM<&%R=\"!T97-T+@ \n \nend\n";
174
175 fn make_entry(part_number: u32, body: &[u8]) -> PartEntry {
176 PartEntry {
177 part_number,
178 body_bytes: body.to_vec(),
179 subject: None,
180 }
181 }
182
183 // ------------------------------------------------------------------
184 // Single-part reassembly
185 // ------------------------------------------------------------------
186
187 /// Oracle: uu.encode(b'Hello, World! ', 'file.bin', 0o644)
188 /// => b'begin 644 file.bin\n.2&5L;&\\L(%=O<FQD(2 \n \nend\n'
189 /// decoded: b'Hello, World! '
190 #[test]
191 fn single_part_correct_data_and_metadata() {
192 let mut c = PartCollection::with_total(1);
193 c.add(make_entry(1, PART1_BODY)).unwrap();
194
195 let result = reassemble(&c).unwrap();
196 assert_eq!(result.data, b"Hello, World! ");
197 assert_eq!(result.filename, "file.bin");
198 assert_eq!(result.mode, 0o644);
199 assert!(!result.is_truncated);
200 assert!(result.missing_parts.is_empty());
201 }
202
203 // ------------------------------------------------------------------
204 // Three-part reassembly
205 // ------------------------------------------------------------------
206
207 /// Oracle: concatenated decoded bytes of all 3 parts = full 40-byte file.
208 #[test]
209 fn three_parts_full_reassembly() {
210 let mut c = PartCollection::with_total(3);
211 c.add(make_entry(1, PART1_BODY)).unwrap();
212 c.add(make_entry(2, PART2_BODY)).unwrap();
213 c.add(make_entry(3, PART3_BODY)).unwrap();
214
215 let result = reassemble(&c).unwrap();
216 assert_eq!(result.data, b"Hello, World! This is a multi-part test.");
217 assert_eq!(result.filename, "file.bin");
218 assert_eq!(result.mode, 0o644);
219 assert!(!result.is_truncated);
220 assert!(result.missing_parts.is_empty());
221 }
222
223 /// Parts arrive out of order — reassembly must sort ascending.
224 #[test]
225 fn three_parts_out_of_order_still_correct() {
226 let mut c = PartCollection::with_total(3);
227 c.add(make_entry(3, PART3_BODY)).unwrap();
228 c.add(make_entry(1, PART1_BODY)).unwrap();
229 c.add(make_entry(2, PART2_BODY)).unwrap();
230
231 let result = reassemble(&c).unwrap();
232 assert_eq!(result.data, b"Hello, World! This is a multi-part test.");
233 assert!(!result.is_truncated);
234 }
235
236 // ------------------------------------------------------------------
237 // Missing part — is_truncated + missing_parts populated
238 // ------------------------------------------------------------------
239
240 /// Missing part 2 of 3: is_truncated=true, missing_parts=[2],
241 /// data = part1 decoded ++ part3 decoded (parts present: 1 and 3).
242 #[test]
243 fn missing_middle_part_yields_truncated() {
244 let mut c = PartCollection::with_total(3);
245 c.add(make_entry(1, PART1_BODY)).unwrap();
246 // part 2 deliberately omitted
247 c.add(make_entry(3, PART3_BODY)).unwrap();
248
249 let result = reassemble(&c).unwrap();
250 assert!(result.is_truncated);
251 assert_eq!(result.missing_parts, vec![2]);
252 // Data contains part1 + part3 decoded bytes
253 // Oracle: part1 decoded = b'Hello, World! ', part3 decoded = b'ti-part test.'
254 assert_eq!(result.data, b"Hello, World! ti-part test.");
255 }
256
257 // ------------------------------------------------------------------
258 // Empty collection
259 // ------------------------------------------------------------------
260
261 #[test]
262 fn empty_collection_returns_error() {
263 let c = PartCollection::new();
264 let err = reassemble(&c).unwrap_err();
265 assert!(matches!(err, MultiUuError::EmptyCollection));
266 }
267
268 /// Collection with only a TOC part (part_number=0) has no data parts.
269 #[test]
270 fn toc_only_is_empty_collection() {
271 let mut c = PartCollection::new();
272 c.add(PartEntry {
273 part_number: 0,
274 body_bytes: b"toc data".to_vec(),
275 subject: None,
276 })
277 .unwrap();
278 let err = reassemble(&c).unwrap_err();
279 assert!(matches!(err, MultiUuError::EmptyCollection));
280 }
281
282 // ------------------------------------------------------------------
283 // Decode error on first part
284 // ------------------------------------------------------------------
285
286 /// A corrupt body (no begin line) → DecodeError
287 #[test]
288 fn decode_error_on_first_part_propagates() {
289 let mut c = PartCollection::with_total(1);
290 // Body has no "begin" line → uuencoding::decode returns InvalidBeginLine
291 c.add(make_entry(1, b"this is not valid uu data\n"))
292 .unwrap();
293
294 let err = reassemble(&c).unwrap_err();
295 assert!(matches!(err, MultiUuError::DecodeError(_)));
296 }
297
298 /// A corrupt second part → DecodeError (stops at first error).
299 #[test]
300 fn decode_error_on_second_part_propagates() {
301 let mut c = PartCollection::with_total(2);
302 c.add(make_entry(1, PART1_BODY)).unwrap();
303 c.add(make_entry(2, b"not valid uu\n")).unwrap();
304
305 let err = reassemble(&c).unwrap_err();
306 assert!(matches!(err, MultiUuError::DecodeError(_)));
307 }
308}