use std::collections::HashMap;
use std::io::{self, Read, Seek, SeekFrom};
use std::path::Path;
use md5::{Digest, Md5};
use tracing::{debug, trace, warn};
use crate::types::{Id16, Md5Hash, Par2File, Par2FileSet, SliceChecksum};
const PAR2_MAGIC: &[u8; 8] = b"PAR2\x00PKT";
pub const MAGIC: &[u8; 8] = PAR2_MAGIC;
pub const HEADER_SIZE: usize = 64;
const MIN_PACKET_LEN: u64 = 64;
const TYPE_MAIN: &[u8; 16] = b"PAR 2.0\x00Main\x00\x00\x00\x00";
const TYPE_FILE_DESC: &[u8; 16] = b"PAR 2.0\x00FileDesc";
const TYPE_IFSC: &[u8; 16] = b"PAR 2.0\x00IFSC\x00\x00\x00\x00";
const TYPE_RECOVERY: &[u8; 16] = b"PAR 2.0\x00RecvSlic";
const TYPE_CREATOR: &[u8; 16] = b"PAR 2.0\x00Creator\x00";
#[derive(Debug, thiserror::Error)]
pub enum ParseError {
#[error("I/O error: {0}")]
Io(#[from] io::Error),
#[error("no PAR2 packets found in file")]
NoPar2Packets,
#[error("missing Main packet — cannot determine slice size")]
NoMainPacket,
}
struct ParseState {
recovery_set_id: Option<Id16>,
slice_size: Option<u64>,
nr_files: Option<u32>,
file_descs: HashMap<Id16, FileDescData>,
ifsc_data: HashMap<Id16, Vec<SliceChecksum>>,
recovery_count: u32,
creator: Option<String>,
}
struct FileDescData {
hash: Md5Hash,
hash_16k: Md5Hash,
size: u64,
filename: String,
}
pub fn parse_par2_file(path: &Path) -> Result<Par2FileSet, ParseError> {
let file = std::fs::File::open(path)?;
let file_size = file.metadata()?.len();
let mut reader = io::BufReader::new(file);
parse_par2_reader(&mut reader, file_size)
}
pub fn parse_par2_reader<R: Read + Seek>(
reader: &mut R,
file_size: u64,
) -> Result<Par2FileSet, ParseError> {
let mut state = ParseState {
recovery_set_id: None,
slice_size: None,
nr_files: None,
file_descs: HashMap::new(),
ifsc_data: HashMap::new(),
recovery_count: 0,
creator: None,
};
let mut magic_buf = [0u8; 8];
let mut packets_parsed = 0u32;
loop {
let pos = reader.stream_position()?;
if pos >= file_size {
break;
}
if reader.read_exact(&mut magic_buf).is_err() {
break;
}
if magic_buf != *PAR2_MAGIC {
if let Some(next_pos) = scan_for_magic(reader, file_size)? {
reader.seek(SeekFrom::Start(next_pos))?;
continue;
}
break;
}
let mut len_buf = [0u8; 8];
if reader.read_exact(&mut len_buf).is_err() {
break;
}
let packet_len = u64::from_le_bytes(len_buf);
if packet_len < MIN_PACKET_LEN || packet_len % 4 != 0 {
warn!(packet_len, pos, "invalid PAR2 packet length, skipping");
continue;
}
let body_len = packet_len - 16; if body_len > 10 * 1024 * 1024 {
let mut md5_buf = [0u8; 16];
reader.read_exact(&mut md5_buf)?;
let mut type_header = [0u8; 32]; reader.read_exact(&mut type_header)?;
let packet_type = &type_header[16..32];
if packet_type == TYPE_RECOVERY {
state.recovery_count += 1;
if state.recovery_set_id.is_none() {
let mut id = [0u8; 16];
id.copy_from_slice(&type_header[..16]);
state.recovery_set_id = Some(id);
}
}
let remaining = packet_len - 64;
reader.seek(SeekFrom::Current(remaining as i64))?;
packets_parsed += 1;
continue;
}
let mut stored_md5 = [0u8; 16];
reader.read_exact(&mut stored_md5)?;
let data_len = (packet_len - 32) as usize;
let mut data = vec![0u8; data_len];
if reader.read_exact(&mut data).is_err() {
break;
}
let computed_md5: [u8; 16] = Md5::digest(&data).into();
if computed_md5 != stored_md5 {
warn!(pos, "PAR2 packet MD5 mismatch, skipping");
continue;
}
let mut set_id = [0u8; 16];
set_id.copy_from_slice(&data[..16]);
if state.recovery_set_id.is_none() {
state.recovery_set_id = Some(set_id);
}
let packet_type = &data[16..32];
if packet_type == TYPE_FILE_DESC {
parse_file_desc(&data, &mut state);
} else if packet_type == TYPE_IFSC {
parse_ifsc(&data, packet_len, &mut state);
} else if packet_type == TYPE_MAIN {
parse_main(&data, &mut state);
} else if packet_type == TYPE_RECOVERY {
state.recovery_count += 1;
} else if packet_type == TYPE_CREATOR {
parse_creator(&data, &mut state);
}
packets_parsed += 1;
if let Some(nr) = state.nr_files {
if state.file_descs.len() == nr as usize
&& state.ifsc_data.len() == nr as usize
&& state.slice_size.is_some()
{
if file_size > 10 * 1024 * 1024 {
debug!(
packets_parsed,
"parsed all file metadata, stopping early on large file"
);
break;
}
}
}
}
if packets_parsed == 0 {
return Err(ParseError::NoPar2Packets);
}
let slice_size = state.slice_size.ok_or(ParseError::NoMainPacket)?;
let recovery_set_id = state.recovery_set_id.unwrap_or([0u8; 16]);
let mut files = HashMap::new();
for (file_id, desc) in state.file_descs {
let slices = state.ifsc_data.remove(&file_id).unwrap_or_default();
files.insert(
file_id,
Par2File {
file_id,
hash: desc.hash,
hash_16k: desc.hash_16k,
size: desc.size,
filename: desc.filename,
slices,
},
);
}
debug!(
files = files.len(),
recovery_blocks = state.recovery_count,
slice_size,
creator = state.creator.as_deref().unwrap_or("unknown"),
"PAR2 file parsed"
);
Ok(Par2FileSet {
recovery_set_id,
slice_size,
files,
recovery_block_count: state.recovery_count,
creator: state.creator,
})
}
fn parse_file_desc(data: &[u8], state: &mut ParseState) {
if data.len() < 88 {
warn!("FileDesc packet too short ({} bytes)", data.len());
return;
}
let mut file_id = [0u8; 16];
file_id.copy_from_slice(&data[32..48]);
if state.file_descs.contains_key(&file_id) {
return;
}
let mut hash = [0u8; 16];
hash.copy_from_slice(&data[48..64]);
let mut hash_16k = [0u8; 16];
hash_16k.copy_from_slice(&data[64..80]);
let size = u64::from_le_bytes(data[80..88].try_into().unwrap());
let name_bytes = &data[88..];
let name_end = name_bytes
.iter()
.position(|&b| b == 0)
.unwrap_or(name_bytes.len());
let filename = String::from_utf8_lossy(&name_bytes[..name_end]).into_owned();
trace!(filename, size, "parsed FileDesc");
state.file_descs.insert(
file_id,
FileDescData {
hash,
hash_16k,
size,
filename,
},
);
}
fn parse_ifsc(data: &[u8], packet_len: u64, state: &mut ParseState) {
if data.len() < 48 {
warn!("IFSC packet too short ({} bytes)", data.len());
return;
}
let mut file_id = [0u8; 16];
file_id.copy_from_slice(&data[32..48]);
if state.ifsc_data.contains_key(&file_id) {
return;
}
let body_len = (packet_len - 64) as usize; let checksum_data = &data[48..];
let num_slices = (body_len - 16) / 20;
let mut slices = Vec::with_capacity(num_slices);
for i in 0..num_slices {
let offset = i * 20;
if offset + 20 > checksum_data.len() {
break;
}
let mut md5 = [0u8; 16];
md5.copy_from_slice(&checksum_data[offset..offset + 16]);
let crc32 = u32::from_le_bytes(checksum_data[offset + 16..offset + 20].try_into().unwrap());
slices.push(SliceChecksum { md5, crc32 });
}
trace!(slices = slices.len(), "parsed IFSC");
state.ifsc_data.insert(file_id, slices);
}
fn parse_main(data: &[u8], state: &mut ParseState) {
if data.len() < 44 {
warn!("Main packet too short ({} bytes)", data.len());
return;
}
let slice_size = u64::from_le_bytes(data[32..40].try_into().unwrap());
let nr_files = u32::from_le_bytes(data[40..44].try_into().unwrap());
trace!(slice_size, nr_files, "parsed Main");
state.slice_size = Some(slice_size);
state.nr_files = Some(nr_files);
}
fn parse_creator(data: &[u8], state: &mut ParseState) {
if data.len() <= 32 {
return;
}
let creator_bytes = &data[32..];
let end = creator_bytes
.iter()
.position(|&b| b == 0)
.unwrap_or(creator_bytes.len());
let creator = String::from_utf8_lossy(&creator_bytes[..end]).into_owned();
debug!(creator, "PAR2 creator");
state.creator = Some(creator);
}
fn scan_for_magic<R: Read + Seek>(reader: &mut R, file_size: u64) -> io::Result<Option<u64>> {
let start = reader.stream_position()?;
let mut buf = [0u8; 4096];
let mut search_pos = start;
while search_pos < file_size {
reader.seek(SeekFrom::Start(search_pos))?;
let n = reader.read(&mut buf)?;
if n < 8 {
return Ok(None);
}
for i in 0..n.saturating_sub(7) {
if &buf[i..i + 8] == PAR2_MAGIC {
return Ok(Some(search_pos + i as u64));
}
}
search_pos += (n - 7) as u64;
}
Ok(None)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_par2test() {
let path = Path::new("/home/sprooty/sabnzbd/tests/data/par2repair/basic/par2test.par2");
if !path.exists() {
eprintln!("Skipping test: {path:?} not found");
return;
}
let set = parse_par2_file(path).unwrap();
assert_eq!(set.files.len(), 6, "expected 6 files in par2 set");
assert_eq!(set.slice_size, 100000, "expected slice_size = 100000");
assert_eq!(
set.creator.as_deref(),
Some("QuickPar 0.9"),
"expected creator = QuickPar 0.9"
);
assert_eq!(set.recovery_block_count, 0);
let filenames: Vec<&str> = set.files.values().map(|f| f.filename.as_str()).collect();
for i in 1..=6 {
let expected = format!("par2test.part{i}.rar");
assert!(
filenames.contains(&expected.as_str()),
"missing file: {expected}"
);
}
for f in set.files.values() {
if f.filename == "par2test.part6.rar" {
assert!(f.size < 100000, "part6 should be smaller than slice_size");
} else {
assert_eq!(f.size, 102400, "{} should be 102400 bytes", f.filename);
}
}
for f in set.files.values() {
assert!(
!f.slices.is_empty(),
"{} should have slice checksums",
f.filename
);
}
}
#[test]
fn test_parse_basic_16k() {
let path = Path::new("/home/sprooty/sabnzbd/tests/data/par2file/basic_16k.par2");
if !path.exists() {
eprintln!("Skipping test: {path:?} not found");
return;
}
let set = parse_par2_file(path).unwrap();
assert!(!set.files.is_empty(), "should parse at least one file");
assert!(set.slice_size > 0, "slice_size should be > 0");
}
#[test]
fn test_parse_non_par2() {
let path =
Path::new("/home/sprooty/sabnzbd/tests/data/par2repair/basic/par2test.part2.rar");
if !path.exists() {
eprintln!("Skipping test: {path:?} not found");
return;
}
let result = parse_par2_file(path);
assert!(result.is_err(), "parsing a RAR file should fail");
}
#[test]
fn test_parse_recovery_volume() {
let path =
Path::new("/home/sprooty/sabnzbd/tests/data/par2repair/basic/par2test.vol0+1.par2");
if !path.exists() {
eprintln!("Skipping test: {path:?} not found");
return;
}
let set = parse_par2_file(path).unwrap();
assert!(
set.recovery_block_count >= 1,
"recovery volume should have at least 1 recovery block"
);
}
}