Skip to main content

uuencoding_multi/
lib.rs

1//! Multi-part UUencoded Usenet/email post reassembly.
2//!
3//! # Background
4//!
5//! Before MIME attachments became universal, large binary files were shared on
6//! Usenet and via email by UUencoding them and splitting the result across
7//! multiple posts or messages. Each post contained a sequential segment of the
8//! encoded data, identified by a subject-line marker such as `[2/7]` or
9//! `(2 of 7)`. Readers would collect all parts and concatenate the UU bodies
10//! before decoding.
11//!
12//! Each multi-part series often began with a part 0 (the TOC post) that listed
13//! the files being distributed along with their sizes and which parts each file
14//! spanned. This crate handles both the TOC and the data parts.
15//!
16//! # What this crate provides
17//!
18//! - [`parse_subject`] — extract part index, part total, and base subject from
19//!   a Usenet/email subject line. Recognises five common marker formats:
20//!   `(N/M)`, `[N/M]`, `Part N/M`, `Part N of M`, and `- N/M`.
21//! - [`PartCollection`] — accumulate [`PartEntry`] values keyed by part number
22//!   until all parts are present, with gap detection and duplicate rejection.
23//! - [`reassemble()`] — validate completeness, concatenate raw UU bodies in
24//!   ascending part order, and decode via the `uuencoding` crate.
25//! - [`parse_toc`] — best-effort parse of a TOC body (part 0), returning a
26//!   [`ParsedToc`] with [`TocEntry`] records for each file listed.
27//!
28//! # What this crate does NOT do
29//!
30//! - **MIME parsing**: this crate operates on raw message body bytes that the
31//!   caller has already extracted from the MIME structure. Use the `mime-tree`
32//!   crate (or equivalent) to parse the enclosing MIME message and locate the
33//!   plain-text body part before passing bytes here.
34//! - **Message fetching or storage**: retrieving articles from an NNTP server,
35//!   reading mailbox files, or persisting collected parts is entirely the
36//!   caller's responsibility.
37//! - **yEnc decoding**: subject lines that contain a `yEnc` marker are
38//!   explicitly rejected by [`parse_subject`] (returns `None`). yEnc is a
39//!   distinct binary encoding with its own tools.
40//!
41//! # Integration with `mime-tree`
42//!
43//! The expected integration pattern is:
44//! 1. Parse the raw RFC 5322 message bytes with `mime-tree` to obtain the
45//!    `Subject` header value and the plain-text body.
46//! 2. Pass the `Subject` string to [`parse_subject`] to identify the part
47//!    number and group key.
48//! 3. Wrap the body bytes in a [`PartEntry`] and insert it into a
49//!    [`PartCollection`] keyed by the base subject.
50//! 4. Once the collection is complete, call [`reassemble()`].
51//!
52//! # Security
53//!
54//! The `data` field of [`ReassembledFile`] is raw decoded bytes that may
55//! represent a compressed archive (`.tar.gz`, `.zip`, `.rar`, etc.). **This
56//! crate never decompresses the output.** Callers that subsequently decompress
57//! the data must apply independent size and resource limits to defend against
58//! decompression-bomb attacks before beginning decompression.
59//!
60//! # End-to-end usage example
61//!
62//! ```no_run
63//! use uuencoding_multi::{
64//!     parse_subject, PartCollection, PartEntry, reassemble,
65//! };
66//!
67//! // Imagine these come from an NNTP server or mailbox.
68//! let raw_messages: Vec<(String, Vec<u8>)> = todo!("fetch messages");
69//!
70//! let mut collections: std::collections::HashMap<String, PartCollection> =
71//!     std::collections::HashMap::new();
72//!
73//! for (subject, body_bytes) in raw_messages {
74//!     // Step 1: parse the subject to identify part number and grouping key.
75//!     let Some(sp) = parse_subject(&subject) else {
76//!         continue; // empty or yEnc subject — skip
77//!     };
78//!     let Some(part_index) = sp.part_index else {
79//!         continue; // no part marker — treat as a plain message
80//!     };
81//!
82//!     // Step 2: accumulate parts by base subject.
83//!     let coll = collections.entry(sp.base_subject).or_default();
84//!     if let Some(total) = sp.part_total {
85//!         if coll.total().is_none() {
86//!             *coll = PartCollection::with_total(total);
87//!         }
88//!     }
89//!     let entry = PartEntry { part_number: part_index, body_bytes, subject: Some(subject) };
90//!     let _ = coll.add(entry); // ignore duplicates
91//! }
92//!
93//! // Step 3: reassemble complete collections.
94//! for (key, coll) in &collections {
95//!     if !coll.is_complete() {
96//!         eprintln!("{key}: still waiting for {:?}", coll.missing_parts());
97//!         continue;
98//!     }
99//!     let file = reassemble(coll).expect("complete collection should decode");
100//!     // IMPORTANT: apply size/resource limits before decompressing `file.data`.
101//!     println!("decoded {} ({} bytes, mode {:o})", file.filename, file.data.len(), file.mode);
102//! }
103//! ```
104
105mod collection;
106mod error;
107mod reassemble;
108mod subject;
109mod toc;
110
111pub use collection::{PartCollection, PartEntry};
112pub use error::MultiUuError;
113pub use reassemble::{reassemble, ReassembledFile};
114pub use subject::{parse_subject, SubjectParts};
115pub use toc::{parse_toc, ParsedToc, TocEntry};