Skip to main content

rust_par2/
packets.rs

1//! PAR2 binary packet parser.
2//!
3//! Parses PAR2 files according to the PAR 2.0 specification:
4//! <http://parchive.sourceforge.net/docs/specifications/parity-volume-spec/article-spec.html>
5//!
6//! Packet layout (all multi-byte fields are little-endian):
7//! ```text
8//! Offset  Size  Description
9//!   0       8   Magic: "PAR2\x00PKT"
10//!   8       8   Packet length (u64, includes header, must be multiple of 4)
11//!  16      16   MD5 hash of bytes 32..packet_end
12//!  32      16   Recovery Set ID
13//!  48      16   Packet Type
14//!  64       ?   Body (packet-type specific)
15//! ```
16
17use std::collections::HashMap;
18use std::io::{self, Read, Seek, SeekFrom};
19use std::path::Path;
20
21use md5::{Digest, Md5};
22use tracing::{debug, trace, warn};
23
24use crate::types::{Id16, Md5Hash, Par2File, Par2FileSet, SliceChecksum};
25
26/// PAR2 packet magic bytes.
27const PAR2_MAGIC: &[u8; 8] = b"PAR2\x00PKT";
28
29/// Public constants for use by the recovery module.
30pub const MAGIC: &[u8; 8] = PAR2_MAGIC;
31pub const HEADER_SIZE: usize = 64;
32
33/// Minimum packet length (header only, no body).
34const MIN_PACKET_LEN: u64 = 64;
35
36// Packet type identifiers (16 bytes each).
37const TYPE_MAIN: &[u8; 16] = b"PAR 2.0\x00Main\x00\x00\x00\x00";
38const TYPE_FILE_DESC: &[u8; 16] = b"PAR 2.0\x00FileDesc";
39const TYPE_IFSC: &[u8; 16] = b"PAR 2.0\x00IFSC\x00\x00\x00\x00";
40const TYPE_RECOVERY: &[u8; 16] = b"PAR 2.0\x00RecvSlic";
41const TYPE_CREATOR: &[u8; 16] = b"PAR 2.0\x00Creator\x00";
42
43/// Errors that can occur while parsing PAR2 files.
44#[derive(Debug, thiserror::Error)]
45pub enum ParseError {
46    #[error("I/O error: {0}")]
47    Io(#[from] io::Error),
48    #[error("no PAR2 packets found in file")]
49    NoPar2Packets,
50    #[error("missing Main packet — cannot determine slice size")]
51    NoMainPacket,
52}
53
54/// Intermediate storage during parsing (packets can arrive in any order).
55struct ParseState {
56    recovery_set_id: Option<Id16>,
57    slice_size: Option<u64>,
58    nr_files: Option<u32>,
59    /// FileDesc data keyed by File ID.
60    file_descs: HashMap<Id16, FileDescData>,
61    /// IFSC (slice checksum) data keyed by File ID.
62    ifsc_data: HashMap<Id16, Vec<SliceChecksum>>,
63    /// Recovery slice count.
64    recovery_count: u32,
65    /// Creator string.
66    creator: Option<String>,
67}
68
69struct FileDescData {
70    hash: Md5Hash,
71    hash_16k: Md5Hash,
72    size: u64,
73    filename: String,
74}
75
76/// Parse a PAR2 file and return the complete file set metadata.
77///
78/// This reads the entire PAR2 file (typically the index `.par2` file, not the
79/// large `.volNNN+NNN.par2` recovery volumes). For recovery volumes, only the
80/// header packets are read — the large recovery data is skipped.
81pub fn parse_par2_file(path: &Path) -> Result<Par2FileSet, ParseError> {
82    let file = std::fs::File::open(path)?;
83    let file_size = file.metadata()?.len();
84    let mut reader = io::BufReader::new(file);
85
86    parse_par2_reader(&mut reader, file_size)
87}
88
89/// Parse PAR2 packets from any `Read + Seek` source.
90pub fn parse_par2_reader<R: Read + Seek>(
91    reader: &mut R,
92    file_size: u64,
93) -> Result<Par2FileSet, ParseError> {
94    let mut state = ParseState {
95        recovery_set_id: None,
96        slice_size: None,
97        nr_files: None,
98        file_descs: HashMap::new(),
99        ifsc_data: HashMap::new(),
100        recovery_count: 0,
101        creator: None,
102    };
103
104    let mut magic_buf = [0u8; 8];
105    let mut packets_parsed = 0u32;
106
107    loop {
108        let pos = reader.stream_position()?;
109        if pos >= file_size {
110            break;
111        }
112
113        // Read magic
114        if reader.read_exact(&mut magic_buf).is_err() {
115            break;
116        }
117
118        if magic_buf != *PAR2_MAGIC {
119            // Not at a packet boundary — try to find the next one.
120            // This handles trailing garbage or alignment issues.
121            if let Some(next_pos) = scan_for_magic(reader, file_size)? {
122                reader.seek(SeekFrom::Start(next_pos))?;
123                continue;
124            }
125            break;
126        }
127
128        // Read packet length
129        let mut len_buf = [0u8; 8];
130        if reader.read_exact(&mut len_buf).is_err() {
131            break;
132        }
133        let packet_len = u64::from_le_bytes(len_buf);
134
135        // Validate length
136        if packet_len < MIN_PACKET_LEN || packet_len % 4 != 0 {
137            warn!(packet_len, pos, "invalid PAR2 packet length, skipping");
138            continue;
139        }
140
141        // Don't read absurdly large packets into memory (recovery slices
142        // can be many megabytes). We only need the type to count them.
143        let body_len = packet_len - 16; // everything after magic + length + md5
144        if body_len > 10 * 1024 * 1024 {
145            // Large packet — likely a recovery slice. Read just the type.
146            let mut md5_buf = [0u8; 16];
147            reader.read_exact(&mut md5_buf)?;
148
149            let mut type_header = [0u8; 32]; // recovery_set_id + type
150            reader.read_exact(&mut type_header)?;
151            let packet_type = &type_header[16..32];
152
153            if packet_type == TYPE_RECOVERY {
154                state.recovery_count += 1;
155                if state.recovery_set_id.is_none() {
156                    let mut id = [0u8; 16];
157                    id.copy_from_slice(&type_header[..16]);
158                    state.recovery_set_id = Some(id);
159                }
160            }
161
162            // Skip the rest
163            let remaining = packet_len - 64;
164            reader.seek(SeekFrom::Current(remaining as i64))?;
165            packets_parsed += 1;
166            continue;
167        }
168
169        // Read MD5 hash of packet body
170        let mut stored_md5 = [0u8; 16];
171        reader.read_exact(&mut stored_md5)?;
172
173        // Read the rest of the packet (recovery_set_id + type + body)
174        let data_len = (packet_len - 32) as usize;
175        let mut data = vec![0u8; data_len];
176        if reader.read_exact(&mut data).is_err() {
177            break;
178        }
179
180        // Verify packet MD5
181        let computed_md5: [u8; 16] = Md5::digest(&data).into();
182        if computed_md5 != stored_md5 {
183            warn!(pos, "PAR2 packet MD5 mismatch, skipping");
184            continue;
185        }
186
187        // Extract recovery set ID and packet type
188        let mut set_id = [0u8; 16];
189        set_id.copy_from_slice(&data[..16]);
190        if state.recovery_set_id.is_none() {
191            state.recovery_set_id = Some(set_id);
192        }
193
194        let packet_type = &data[16..32];
195
196        // Dispatch by type
197        if packet_type == TYPE_FILE_DESC {
198            parse_file_desc(&data, &mut state);
199        } else if packet_type == TYPE_IFSC {
200            parse_ifsc(&data, packet_len, &mut state);
201        } else if packet_type == TYPE_MAIN {
202            parse_main(&data, &mut state);
203        } else if packet_type == TYPE_RECOVERY {
204            state.recovery_count += 1;
205        } else if packet_type == TYPE_CREATOR {
206            parse_creator(&data, &mut state);
207        }
208
209        packets_parsed += 1;
210
211        // Early exit optimisation: once we have all file descs and IFSCs, we
212        // can stop (avoids reading huge recovery volumes in concatenated files).
213        if let Some(nr) = state.nr_files {
214            if state.file_descs.len() == nr as usize
215                && state.ifsc_data.len() == nr as usize
216                && state.slice_size.is_some()
217            {
218                // If the file is large, stop early like SABnzbd does.
219                if file_size > 10 * 1024 * 1024 {
220                    debug!(
221                        packets_parsed,
222                        "parsed all file metadata, stopping early on large file"
223                    );
224                    break;
225                }
226            }
227        }
228    }
229
230    if packets_parsed == 0 {
231        return Err(ParseError::NoPar2Packets);
232    }
233
234    let slice_size = state.slice_size.ok_or(ParseError::NoMainPacket)?;
235    let recovery_set_id = state.recovery_set_id.unwrap_or([0u8; 16]);
236
237    // Assemble Par2File entries by joining FileDesc + IFSC data on File ID
238    let mut files = HashMap::new();
239    for (file_id, desc) in state.file_descs {
240        let slices = state.ifsc_data.remove(&file_id).unwrap_or_default();
241        files.insert(
242            file_id,
243            Par2File {
244                file_id,
245                hash: desc.hash,
246                hash_16k: desc.hash_16k,
247                size: desc.size,
248                filename: desc.filename,
249                slices,
250            },
251        );
252    }
253
254    debug!(
255        files = files.len(),
256        recovery_blocks = state.recovery_count,
257        slice_size,
258        creator = state.creator.as_deref().unwrap_or("unknown"),
259        "PAR2 file parsed"
260    );
261
262    Ok(Par2FileSet {
263        recovery_set_id,
264        slice_size,
265        files,
266        recovery_block_count: state.recovery_count,
267        creator: state.creator,
268    })
269}
270
271// ---------------------------------------------------------------------------
272// Packet body parsers
273// ---------------------------------------------------------------------------
274
275/// Parse a FileDesc packet body.
276///
277/// Layout (offsets relative to `data`, which starts at recovery_set_id):
278/// ```text
279///  0..16   Recovery Set ID (already extracted)
280/// 16..32   Packet Type (already matched)
281/// 32..48   File ID
282/// 48..64   Full-file MD5 hash
283/// 64..80   First-16K MD5 hash
284/// 80..88   File size (u64 LE)
285/// 88..     Filename (null-terminated, padded to multiple of 4)
286/// ```
287fn parse_file_desc(data: &[u8], state: &mut ParseState) {
288    if data.len() < 88 {
289        warn!("FileDesc packet too short ({} bytes)", data.len());
290        return;
291    }
292
293    let mut file_id = [0u8; 16];
294    file_id.copy_from_slice(&data[32..48]);
295
296    // Skip duplicates
297    if state.file_descs.contains_key(&file_id) {
298        return;
299    }
300
301    let mut hash = [0u8; 16];
302    hash.copy_from_slice(&data[48..64]);
303
304    let mut hash_16k = [0u8; 16];
305    hash_16k.copy_from_slice(&data[64..80]);
306
307    let size = u64::from_le_bytes(data[80..88].try_into().unwrap());
308
309    // Filename: everything after offset 88, strip null padding
310    let name_bytes = &data[88..];
311    let name_end = name_bytes
312        .iter()
313        .position(|&b| b == 0)
314        .unwrap_or(name_bytes.len());
315    let filename = String::from_utf8_lossy(&name_bytes[..name_end]).into_owned();
316
317    trace!(filename, size, "parsed FileDesc");
318
319    state.file_descs.insert(
320        file_id,
321        FileDescData {
322            hash,
323            hash_16k,
324            size,
325            filename,
326        },
327    );
328}
329
330/// Parse an IFSC (Input File Slice Checksum) packet.
331///
332/// Layout:
333/// ```text
334///  0..16   Recovery Set ID
335/// 16..32   Packet Type
336/// 32..48   File ID
337/// 48..     Pairs of (MD5[16] + CRC32[4]) for each slice
338/// ```
339fn parse_ifsc(data: &[u8], packet_len: u64, state: &mut ParseState) {
340    if data.len() < 48 {
341        warn!("IFSC packet too short ({} bytes)", data.len());
342        return;
343    }
344
345    let mut file_id = [0u8; 16];
346    file_id.copy_from_slice(&data[32..48]);
347
348    // Skip duplicates
349    if state.ifsc_data.contains_key(&file_id) {
350        return;
351    }
352
353    let body_len = (packet_len - 64) as usize; // body after 64-byte header
354    let checksum_data = &data[48..];
355    let num_slices = (body_len - 16) / 20; // subtract File ID, 20 bytes per slice
356
357    let mut slices = Vec::with_capacity(num_slices);
358    for i in 0..num_slices {
359        let offset = i * 20;
360        if offset + 20 > checksum_data.len() {
361            break;
362        }
363
364        let mut md5 = [0u8; 16];
365        md5.copy_from_slice(&checksum_data[offset..offset + 16]);
366        let crc32 = u32::from_le_bytes(checksum_data[offset + 16..offset + 20].try_into().unwrap());
367
368        slices.push(SliceChecksum { md5, crc32 });
369    }
370
371    trace!(slices = slices.len(), "parsed IFSC");
372
373    state.ifsc_data.insert(file_id, slices);
374}
375
376/// Parse the Main packet.
377///
378/// Layout:
379/// ```text
380///  0..16   Recovery Set ID
381/// 16..32   Packet Type
382/// 32..40   Slice size (u64 LE)
383/// 40..44   Number of files in recovery set (u32 LE)
384/// 44..     File IDs (16 bytes each)
385/// ```
386fn parse_main(data: &[u8], state: &mut ParseState) {
387    if data.len() < 44 {
388        warn!("Main packet too short ({} bytes)", data.len());
389        return;
390    }
391
392    let slice_size = u64::from_le_bytes(data[32..40].try_into().unwrap());
393    let nr_files = u32::from_le_bytes(data[40..44].try_into().unwrap());
394
395    trace!(slice_size, nr_files, "parsed Main");
396
397    state.slice_size = Some(slice_size);
398    state.nr_files = Some(nr_files);
399}
400
401/// Parse a Creator packet.
402fn parse_creator(data: &[u8], state: &mut ParseState) {
403    if data.len() <= 32 {
404        return;
405    }
406    let creator_bytes = &data[32..];
407    let end = creator_bytes
408        .iter()
409        .position(|&b| b == 0)
410        .unwrap_or(creator_bytes.len());
411    let creator = String::from_utf8_lossy(&creator_bytes[..end]).into_owned();
412    debug!(creator, "PAR2 creator");
413    state.creator = Some(creator);
414}
415
416// ---------------------------------------------------------------------------
417// Helpers
418// ---------------------------------------------------------------------------
419
420/// Scan forward to find the next PAR2_MAGIC occurrence.
421fn scan_for_magic<R: Read + Seek>(reader: &mut R, file_size: u64) -> io::Result<Option<u64>> {
422    let start = reader.stream_position()?;
423    // Read in chunks to find the magic
424    let mut buf = [0u8; 4096];
425    let mut search_pos = start;
426
427    while search_pos < file_size {
428        reader.seek(SeekFrom::Start(search_pos))?;
429        let n = reader.read(&mut buf)?;
430        if n < 8 {
431            return Ok(None);
432        }
433        for i in 0..n.saturating_sub(7) {
434            if &buf[i..i + 8] == PAR2_MAGIC {
435                return Ok(Some(search_pos + i as u64));
436            }
437        }
438        // Overlap by 7 to catch magic spanning chunk boundaries
439        search_pos += (n - 7) as u64;
440    }
441    Ok(None)
442}
443
444// ---------------------------------------------------------------------------
445// Tests
446// ---------------------------------------------------------------------------
447
448#[cfg(test)]
449mod tests {
450    use super::*;
451
452    /// Test parsing the real PAR2 file from SABnzbd test data.
453    #[test]
454    fn test_parse_par2test() {
455        let path = Path::new("/home/sprooty/sabnzbd/tests/data/par2repair/basic/par2test.par2");
456        if !path.exists() {
457            eprintln!("Skipping test: {path:?} not found");
458            return;
459        }
460
461        let set = parse_par2_file(path).unwrap();
462
463        // Should have 6 files
464        assert_eq!(set.files.len(), 6, "expected 6 files in par2 set");
465
466        // Slice size should be 100000 (0x186A0)
467        assert_eq!(set.slice_size, 100000, "expected slice_size = 100000");
468
469        // Creator should be QuickPar 0.9
470        assert_eq!(
471            set.creator.as_deref(),
472            Some("QuickPar 0.9"),
473            "expected creator = QuickPar 0.9"
474        );
475
476        // No recovery blocks in the index file
477        assert_eq!(set.recovery_block_count, 0);
478
479        // Check that all expected filenames are present
480        let filenames: Vec<&str> = set.files.values().map(|f| f.filename.as_str()).collect();
481        for i in 1..=6 {
482            let expected = format!("par2test.part{i}.rar");
483            assert!(
484                filenames.contains(&expected.as_str()),
485                "missing file: {expected}"
486            );
487        }
488
489        // Check file sizes
490        for f in set.files.values() {
491            if f.filename == "par2test.part6.rar" {
492                // Last part is smaller
493                assert!(f.size < 100000, "part6 should be smaller than slice_size");
494            } else {
495                assert_eq!(f.size, 102400, "{} should be 102400 bytes", f.filename);
496            }
497        }
498
499        // Each file should have IFSC slice data
500        for f in set.files.values() {
501            assert!(
502                !f.slices.is_empty(),
503                "{} should have slice checksums",
504                f.filename
505            );
506        }
507    }
508
509    /// Test parsing the basic_16k par2 file.
510    #[test]
511    fn test_parse_basic_16k() {
512        let path = Path::new("/home/sprooty/sabnzbd/tests/data/par2file/basic_16k.par2");
513        if !path.exists() {
514            eprintln!("Skipping test: {path:?} not found");
515            return;
516        }
517
518        let set = parse_par2_file(path).unwrap();
519        assert!(!set.files.is_empty(), "should parse at least one file");
520        assert!(set.slice_size > 0, "slice_size should be > 0");
521    }
522
523    /// Test that parsing a non-PAR2 file returns an error.
524    #[test]
525    fn test_parse_non_par2() {
526        let path =
527            Path::new("/home/sprooty/sabnzbd/tests/data/par2repair/basic/par2test.part2.rar");
528        if !path.exists() {
529            eprintln!("Skipping test: {path:?} not found");
530            return;
531        }
532
533        let result = parse_par2_file(path);
534        assert!(result.is_err(), "parsing a RAR file should fail");
535    }
536
537    /// Test parsing a recovery volume (should count recovery blocks).
538    #[test]
539    fn test_parse_recovery_volume() {
540        let path =
541            Path::new("/home/sprooty/sabnzbd/tests/data/par2repair/basic/par2test.vol0+1.par2");
542        if !path.exists() {
543            eprintln!("Skipping test: {path:?} not found");
544            return;
545        }
546
547        let set = parse_par2_file(path).unwrap();
548        assert!(
549            set.recovery_block_count >= 1,
550            "recovery volume should have at least 1 recovery block"
551        );
552    }
553}