use std::fs::File;
use std::io::{Cursor, Read, Seek, SeekFrom, Write};
use std::path::{Path, PathBuf};
use thiserror::Error;
mod bodyfile;
mod findings;
pub use findings::{Anomaly, AnomalyKind, Severity};
const DAR_MAGIC: [u8; 4] = [0x00, 0x00, 0x00, 0x7b];
const MAX_CATALOGUE_COMPRESSED: u64 = 512 * 1024 * 1024;
const MAX_CATALOGUE_INFLATED: u64 = 1024 * 1024 * 1024;
const MAX_CRC_SIZE: u64 = 64 * 1024;
const MAX_BLOCK_SIZE: u64 = 256 * 1024 * 1024;
const FAR_FUTURE_EPOCH_SECS: i64 = 4_102_444_800;
const SEQT_CATALOGUE: [u8; 6] = [0xAD, 0xFD, 0xEA, 0x77, 0x21, 0x43];
const FORMAT_11_1: u32 = 11 * 256 + 1;
#[derive(Debug, Error)]
pub enum DarError {
#[error("I/O error: {0}")]
Io(#[from] std::io::Error),
#[error("not a DAR archive")]
NotADar,
#[error("corrupt archive: {0}")]
Corrupt(String),
#[error("entry not found: '{0}'")]
EntryNotFound(String),
}
#[derive(Debug, Clone, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
pub enum CrcStatus {
Match,
Mismatch {
stored: String,
computed: String,
},
NotStored,
}
impl core::fmt::Display for CrcStatus {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
match self {
CrcStatus::Match => f.write_str("CRC match"),
CrcStatus::Mismatch { stored, computed } => {
write!(f, "CRC mismatch: stored {stored}, computed {computed}")
}
CrcStatus::NotStored => f.write_str("no CRC stored"),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
pub enum EntryKind {
File,
Directory,
Symlink,
NamedPipe,
Socket,
CharDevice,
BlockDevice,
Hardlink,
Unknown(char),
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
pub struct DarEntry {
#[cfg_attr(feature = "serde", serde(serialize_with = "serialize_bytes_lossy"))]
pub path: Vec<u8>,
pub kind: EntryKind,
pub size: u64,
pub uid: u64,
pub gid: u64,
pub mode: u16,
pub atime: i64,
pub mtime: i64,
pub ctime: Option<i64>,
#[cfg_attr(feature = "serde", serde(serialize_with = "serialize_opt_bytes_lossy"))]
pub symlink_target: Option<Vec<u8>>,
}
impl DarEntry {
#[must_use]
pub fn path_lossy(&self) -> std::borrow::Cow<'_, str> {
String::from_utf8_lossy(&self.path)
}
#[must_use]
pub fn bodyfile(&self) -> String {
bodyfile::line(self)
}
}
#[cfg(feature = "serde")]
fn serialize_bytes_lossy<S: serde::Serializer>(bytes: &[u8], s: S) -> Result<S::Ok, S::Error> {
s.serialize_str(&String::from_utf8_lossy(bytes))
}
#[cfg(feature = "serde")]
#[allow(clippy::ref_option)]
fn serialize_opt_bytes_lossy<S: serde::Serializer>(
target: &Option<Vec<u8>>,
s: S,
) -> Result<S::Ok, S::Error> {
match target {
Some(bytes) => s.serialize_some(&String::from_utf8_lossy(bytes)),
None => s.serialize_none(),
}
}
#[derive(Debug, Clone)]
struct EntryRef {
path: Vec<u8>,
kind: EntryKind,
size: u64,
uid: u64,
gid: u64,
mode: u16,
atime: i64,
mtime: i64,
ctime: Option<i64>,
symlink_target: Option<Vec<u8>>,
archive_offset: u64,
stored_size: u64,
compression: u8,
crc: Option<Vec<u8>>,
}
impl EntryRef {
fn to_dar_entry(&self) -> DarEntry {
DarEntry {
path: self.path.clone(),
kind: self.kind,
size: self.size,
uid: self.uid,
gid: self.gid,
mode: self.mode,
atime: self.atime,
mtime: self.mtime,
ctime: self.ctime,
symlink_target: self.symlink_target.clone(),
}
}
}
pub struct DarReader<R: Read + Seek> {
inner: R,
archive_origin: u64,
format_major: u32,
complete: bool,
compr_bs: u64,
entries: Vec<EntryRef>,
}
impl<R: Read + Seek> DarReader<R> {
#[allow(clippy::too_many_lines)]
pub fn open(mut reader: R) -> Result<Self, DarError> {
let mut magic = [0u8; 4];
reader
.read_exact(&mut magic)
.map_err(|_| DarError::NotADar)?;
if magic != DAR_MAGIC {
return Err(DarError::NotADar);
}
let mut label = [0u8; 10];
reader.read_exact(&mut label)?; let _flag = read_u8(&mut reader)?; let extension = read_u8(&mut reader)?;
let entries;
let archive_origin;
let format_major;
let complete;
let compr_bs;
if extension == b'T' {
let tlv_count = read_infinint(&mut reader).map_err(|e| match e {
DarError::Io(_) => DarError::Corrupt("truncated TLV block".into()),
other => other,
})?;
let mut data_name: Option<[u8; 10]> = None;
for _ in 0..tlv_count {
let mut typ = [0u8; 2];
reader.read_exact(&mut typ)?;
let len = read_infinint(&mut reader)?;
if typ == [0x00, 0x03] && len == 10 {
let mut dn = [0u8; 10];
reader.read_exact(&mut dn)?;
data_name = Some(dn);
} else {
skip(&mut reader, len)?;
}
}
archive_origin = reader.stream_position()?;
let format_value = read_format_value(&mut reader);
let global_comp = read_u8(&mut reader).unwrap_or(b'n');
compr_bs = read_compr_bs(&mut reader, format_value >> 8);
reader.seek(SeekFrom::Start(archive_origin))?;
let via_escape = find_catalogue(&mut reader, data_name.as_ref().unwrap_or(&label))?;
format_major = format_value >> 8;
if via_escape && is_compressed(global_comp) {
let mut compressed = Vec::new();
reader
.by_ref()
.take(MAX_CATALOGUE_COMPRESSED)
.read_to_end(&mut compressed)?;
let inflated = inflate_catalogue(&compressed, global_comp, compr_bs)?;
let mut cur = Cursor::new(inflated);
skip(&mut cur, 10)?; if format_value >= FORMAT_11_1 {
skip_nul_string(&mut cur)?;
}
(entries, complete) = parse_catalog(&mut cur, format_major, global_comp)?;
} else {
if via_escape {
skip(&mut reader, 10)?; }
if format_value >= FORMAT_11_1 {
skip_nul_string(&mut reader)?;
}
(entries, complete) = parse_catalog(&mut reader, format_major, global_comp)?;
}
} else if extension == b'N' || extension == b'S' {
compr_bs = 0;
if extension == b'S' {
read_infinint(&mut reader)?; }
archive_origin = reader.stream_position()?;
let format_value = read_format_value(&mut reader); format_major = format_value >> 8;
let global_comp = read_u8(&mut reader).unwrap_or(b'n');
let cat_offset = read_terminateur(&mut reader)?;
let cat_start = archive_origin
.checked_add(cat_offset)
.ok_or_else(|| DarError::Corrupt("catalogue offset overflows".into()))?;
let end = reader.seek(SeekFrom::End(0))?;
if cat_start >= end {
return Err(DarError::Corrupt(format!(
"catalogue start {cat_start} past archive end {end}"
)));
}
reader.seek(SeekFrom::Start(cat_start))?;
if is_compressed(global_comp) {
let mut compressed = Vec::new();
reader
.by_ref()
.take(MAX_CATALOGUE_COMPRESSED)
.read_to_end(&mut compressed)?;
let inflated = inflate_catalogue(&compressed, global_comp, compr_bs)?;
(entries, complete) =
parse_catalog(&mut Cursor::new(inflated), format_major, global_comp)?;
} else {
(entries, complete) = parse_catalog(&mut reader, format_major, global_comp)?;
}
} else {
return Err(DarError::Corrupt(format!(
"unknown slice-header extension {extension:#04x}"
)));
}
Ok(Self {
inner: reader,
archive_origin,
format_major,
complete,
compr_bs,
entries,
})
}
#[must_use]
pub fn entry_count(&self) -> usize {
self.entries.len()
}
pub fn iter_entries(&self) -> impl Iterator<Item = DarEntry> + '_ {
self.entries.iter().map(EntryRef::to_dar_entry)
}
pub fn entries(&self) -> Vec<DarEntry> {
self.iter_entries().collect()
}
#[must_use]
pub fn is_complete(&self) -> bool {
self.complete
}
#[must_use]
pub fn audit(&self) -> Vec<Anomaly> {
let mut anomalies = Vec::new();
if !self.complete {
anomalies.push(Anomaly::new(AnomalyKind::IncompleteCatalog {
entries_recovered: self.entries.len(),
}));
}
let mut seen: std::collections::HashSet<&[u8]> = std::collections::HashSet::new();
let mut dup_seen: std::collections::HashSet<&[u8]> = std::collections::HashSet::new();
for e in &self.entries {
let path = String::from_utf8_lossy(&e.path).into_owned();
if e.path.first() == Some(&b'/') {
anomalies.push(Anomaly::new(AnomalyKind::AbsolutePath {
path: path.clone(),
}));
}
if e.path.split(|&b| b == b'/').any(|c| c == b"..") {
anomalies.push(Anomaly::new(AnomalyKind::ParentTraversal {
path: path.clone(),
}));
}
if e.path.iter().any(|&b| b < 0x20 || b == 0x7f) {
anomalies.push(Anomaly::new(AnomalyKind::ControlCharsInName {
path: path.clone(),
}));
}
for (field, t) in [("atime", e.atime), ("mtime", e.mtime)]
.into_iter()
.chain(e.ctime.map(|c| ("ctime", c)))
{
if t > FAR_FUTURE_EPOCH_SECS {
anomalies.push(Anomaly::new(AnomalyKind::FutureTimestamp {
path: path.clone(),
field,
epoch_secs: t,
}));
}
}
if !seen.insert(e.path.as_slice()) && dup_seen.insert(e.path.as_slice()) {
anomalies.push(Anomaly::new(AnomalyKind::DuplicatePath { path }));
}
}
anomalies.sort_by_key(|a| std::cmp::Reverse(a.severity));
anomalies
}
pub fn write_bodyfile<W: Write>(&self, out: &mut W) -> std::io::Result<()> {
for entry in self.iter_entries() {
writeln!(out, "{}", entry.bodyfile())?;
}
Ok(())
}
pub fn verify<P: AsRef<[u8]>>(&mut self, path: P) -> Result<CrcStatus, DarError> {
let path = path.as_ref();
let stored = self
.entries
.iter()
.find(|e| e.path.as_slice() == path)
.ok_or_else(|| DarError::EntryNotFound(String::from_utf8_lossy(path).into_owned()))?
.crc
.clone();
let Some(stored) = stored else {
return Ok(CrcStatus::NotStored);
};
let data = self.extract(path)?;
let computed = dar_crc(&data, stored.len());
if computed == stored {
Ok(CrcStatus::Match)
} else {
Ok(CrcStatus::Mismatch {
stored: to_hex(&stored),
computed: to_hex(&computed),
})
}
}
pub fn extract_to<P: AsRef<[u8]>, W: Write>(
&mut self,
path: P,
out: &mut W,
) -> Result<u64, DarError> {
let path = path.as_ref();
let name = String::from_utf8_lossy(path);
let entry = self
.entries
.iter()
.find(|e| e.path.as_slice() == path)
.ok_or_else(|| DarError::EntryNotFound(name.clone().into_owned()))?
.clone();
let start = self
.archive_origin
.checked_add(entry.archive_offset)
.ok_or_else(|| {
DarError::Corrupt(format!("'{name}' archive offset overflows file position"))
})?;
let end = self.inner.seek(SeekFrom::End(0))?;
if start > end {
return Err(DarError::Corrupt(format!(
"'{name}' starts at {start}, past archive end {end}"
)));
}
let available = end - start;
self.inner.seek(SeekFrom::Start(start))?;
if !is_compressed(entry.compression) {
if entry.stored_size > available {
return Err(DarError::Corrupt(format!(
"'{name}' claims {} stored bytes but only {available} remain",
entry.stored_size
)));
}
return Ok(std::io::copy(
&mut self.inner.by_ref().take(entry.stored_size),
out,
)?);
}
let mut cap = CapWriter {
inner: out,
written: 0,
max: entry.size,
};
if self.format_major == 1 {
decode_stream(self.inner.by_ref(), entry.compression, &mut cap)?;
} else {
if entry.stored_size > available {
return Err(DarError::Corrupt(format!(
"'{name}' claims {} stored bytes but only {available} remain",
entry.stored_size
)));
}
let mut data = vec![0u8; entry.stored_size as usize];
self.inner.read_exact(&mut data)?;
decode_data(&data[..], entry.compression, self.compr_bs, &mut cap)?;
}
if cap.written != entry.size {
return Err(DarError::Corrupt(format!(
"'{name}' decompressed to {} bytes but catalog declares {}",
cap.written, entry.size
)));
}
Ok(cap.written)
}
pub fn extract<P: AsRef<[u8]>>(&mut self, path: P) -> Result<Vec<u8>, DarError> {
let mut buf = Vec::new();
self.extract_to(path, &mut buf)?;
Ok(buf)
}
}
const TAIL_SCAN: u64 = 256 * 1024 * 1024;
const CHUNK: usize = 4 * 1024 * 1024;
const OVERLAP: usize = 9;
fn scan_window<R: Read + Seek>(
r: &mut R,
label: &[u8; 10],
use_label: bool,
) -> Result<Option<bool>, DarError> {
let mut buf = vec![0u8; CHUNK + OVERLAP];
let mut overlap_len: usize = 0;
loop {
let chunk_file_pos = r.stream_position()?;
let n = r.read(&mut buf[overlap_len..overlap_len + CHUNK])?;
if n == 0 {
break;
}
let total = overlap_len + n;
let buf_base = chunk_file_pos - overlap_len as u64;
if let Some(i) = buf[..total]
.windows(SEQT_CATALOGUE.len())
.position(|w| w == SEQT_CATALOGUE)
{
r.seek(SeekFrom::Start(
buf_base + i as u64 + SEQT_CATALOGUE.len() as u64,
))?;
return Ok(Some(true));
}
if use_label {
if let Some(i) = buf[..total]
.windows(label.len())
.position(|w| w == label.as_ref())
{
r.seek(SeekFrom::Start(buf_base + i as u64 + label.len() as u64))?;
return Ok(Some(false));
}
}
let keep = OVERLAP.min(total);
buf.copy_within(total - keep..total, 0);
overlap_len = keep;
}
Ok(None)
}
fn find_catalogue<R: Read + Seek>(r: &mut R, label: &[u8; 10]) -> Result<bool, DarError> {
find_catalogue_within(r, label, TAIL_SCAN)
}
fn find_catalogue_within<R: Read + Seek>(
r: &mut R,
label: &[u8; 10],
tail_scan: u64,
) -> Result<bool, DarError> {
let use_label = !label.iter().all(|&b| b == 0);
let archive_origin = r.stream_position()?;
let file_end = r.seek(SeekFrom::End(0))?;
if file_end <= archive_origin {
return Err(DarError::Corrupt("archive body too short".into()));
}
let tail_start = archive_origin.max(file_end.saturating_sub(tail_scan));
r.seek(SeekFrom::Start(tail_start))?;
if let Some(result) = scan_window(r, label, use_label)? {
return Ok(result);
}
if tail_start > archive_origin {
r.seek(SeekFrom::Start(archive_origin))?;
if let Some(result) = scan_window(r, label, use_label)? {
return Ok(result);
}
}
Err(DarError::Corrupt("seqt_catalogue not found".into()))
}
fn slice_header_len<R: Read + Seek>(r: &mut R) -> Result<u64, DarError> {
let mut magic = [0u8; 4];
r.read_exact(&mut magic).map_err(|_| DarError::NotADar)?;
if magic != DAR_MAGIC {
return Err(DarError::NotADar);
}
skip(r, 10)?; let _flag = read_u8(r)?;
match read_u8(r)? {
b'T' => {
let tlv_count = read_infinint(r)?;
for _ in 0..tlv_count {
skip(r, 2)?;
let len = read_infinint(r)?;
skip(r, len)?;
}
}
b'N' => {}
b'S' => {
read_infinint(r)?; }
other => {
return Err(DarError::Corrupt(format!(
"unknown slice-header extension {other:#04x}"
)));
}
}
Ok(r.stream_position()?)
}
struct SliceSpan {
file: File,
file_data_start: u64,
logical_start: u64,
logical_len: u64,
}
pub struct SliceReader {
slices: Vec<SliceSpan>,
pos: u64,
total: u64,
}
impl SliceReader {
pub fn open(paths: &[PathBuf]) -> Result<Self, DarError> {
if paths.is_empty() {
return Err(DarError::Corrupt("no slices provided".into()));
}
let mut slices = Vec::with_capacity(paths.len());
let mut logical_start = 0u64;
for (i, path) in paths.iter().enumerate() {
let mut file = File::open(path)?;
let len = file.seek(SeekFrom::End(0))?;
file.seek(SeekFrom::Start(0))?;
let file_data_start = if i == 0 {
0
} else {
slice_header_len(&mut file)?
};
let trailer = u64::from(i + 1 < paths.len());
if len < file_data_start + trailer {
return Err(DarError::Corrupt(
"slice smaller than its header + flag".into(),
));
}
let logical_len = len - file_data_start - trailer;
slices.push(SliceSpan {
file,
file_data_start,
logical_start,
logical_len,
});
logical_start = logical_start
.checked_add(logical_len)
.ok_or_else(|| DarError::Corrupt("total slice length overflows".into()))?;
}
Ok(Self {
slices,
pos: 0,
total: logical_start,
})
}
}
impl Read for SliceReader {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
let mut written = 0;
while written < buf.len() {
let pos = self.pos;
let Some(idx) = self
.slices
.iter()
.position(|s| pos < s.logical_start + s.logical_len)
else {
break;
};
let n = {
let span = &mut self.slices[idx];
let within = pos - span.logical_start;
let want = (buf.len() - written).min((span.logical_len - within) as usize);
span.file
.seek(SeekFrom::Start(span.file_data_start + within))?;
span.file.read(&mut buf[written..written + want])?
};
if n == 0 {
break; }
self.pos += n as u64;
written += n;
}
Ok(written)
}
}
impl Seek for SliceReader {
fn seek(&mut self, from: SeekFrom) -> std::io::Result<u64> {
let target: i128 = match from {
SeekFrom::Start(n) => i128::from(n),
SeekFrom::End(n) => i128::from(self.total) + i128::from(n),
SeekFrom::Current(n) => i128::from(self.pos) + i128::from(n),
};
if target < 0 {
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidInput,
"seek before start of archive",
));
}
self.pos = target as u64;
Ok(self.pos)
}
}
impl DarReader<SliceReader> {
pub fn open_slices(basename: &Path) -> Result<Self, DarError> {
let parent = basename
.parent()
.filter(|p| !p.as_os_str().is_empty())
.unwrap_or_else(|| Path::new("."));
let stem = basename
.file_name()
.and_then(|s| s.to_str())
.ok_or_else(|| DarError::Corrupt("invalid slice basename".into()))?;
let mut paths = Vec::new();
let mut n = 1u64;
loop {
let p = parent.join(format!("{stem}.{n}.dar"));
if !p.exists() {
break;
}
paths.push(p);
n += 1;
}
if paths.is_empty() {
return Err(DarError::Corrupt(format!(
"no slices found for basename {}",
basename.display()
)));
}
DarReader::open(SliceReader::open(&paths)?)
}
}
fn read_format_value<R: Read>(r: &mut R) -> u32 {
let b = read_nul_bytes(r).unwrap_or_default();
if b.len() >= 2 {
let major = u32::from(b[0].saturating_sub(48)) * 256 + u32::from(b[1].saturating_sub(48));
let fix = if b.len() >= 3 {
u32::from(b[2].saturating_sub(48))
} else {
0
};
major * 256 + fix
} else {
u32::MAX
}
}
fn read_header_flags<R: Read>(r: &mut R) -> Result<u64, DarError> {
let mut bits: u64 = 0;
loop {
let a = read_u8(r)?;
if bits >> 56 != 0 {
return Err(DarError::Corrupt("header flag field too large".into()));
}
bits = (bits << 8) | u64::from(a & 0xFE);
if a & 0x01 == 0 {
return Ok(bits);
}
}
}
fn read_compr_bs<R: Read>(r: &mut R, format_major: u32) -> u64 {
fn inner<R: Read>(r: &mut R, format_major: u32) -> Result<u64, DarError> {
const INITIAL_OFFSET: u64 = 0x08;
const HAS_COMPRESS_BS: u64 = 0x0800;
const COMPLEX: u64 = 0x20 | 0x04 | 0x02 | 0x0400;
skip_nul_string(r)?; if format_major < 2 {
return Ok(0); }
let flags = read_header_flags(r)?;
if flags & COMPLEX != 0 || flags & HAS_COMPRESS_BS == 0 {
return Ok(0);
}
if flags & INITIAL_OFFSET != 0 {
read_infinint(r)?; }
let bs = read_infinint(r)?;
Ok(if bs > MAX_BLOCK_SIZE { 0 } else { bs })
}
inner(r, format_major).unwrap_or(0)
}
fn is_compressed(algo: u8) -> bool {
matches!(
algo.to_ascii_lowercase(),
b'z' | b'y' | b'x' | b'l' | b'j' | b'k' | b'd' | b'q'
)
}
fn inflate_catalogue(compressed: &[u8], algo: u8, block_size: u64) -> Result<Vec<u8>, DarError> {
let mut out = Vec::new();
let mut cap = CapWriter {
inner: &mut out,
written: 0,
max: MAX_CATALOGUE_INFLATED,
};
decode_data(compressed, algo, block_size, &mut cap)?;
Ok(out)
}
fn decode_data<W: Write>(
data: &[u8],
algo: u8,
block_size: u64,
out: &mut W,
) -> Result<(), DarError> {
let always_block = matches!(algo.to_ascii_lowercase(), b'q' | b'l' | b'j' | b'k');
if block_size > 0 || always_block {
decode_blocks(data, algo, block_size, out)
} else {
decode_stream(data, algo, out)
}
}
fn decode_blocks<W: Write>(
data: &[u8],
algo: u8,
block_size: u64,
out: &mut W,
) -> Result<(), DarError> {
const H_DATA: u8 = 1;
const H_EOF: u8 = 2;
let mut input = data;
let mut raw_block_buf: Vec<u8> =
if matches!(algo.to_ascii_lowercase(), b'q' | b'l' | b'j' | b'k') {
let seed = if block_size > 0 {
block_size.min(MAX_BLOCK_SIZE) as usize
} else {
256 * 1024
};
vec![0u8; seed]
} else {
Vec::new()
};
loop {
let typ = read_u8(&mut input)
.map_err(|_| DarError::Corrupt("truncated block stream: missing end marker".into()))?;
let size = read_infinint(&mut input)?;
match typ {
H_EOF => {
if size != 0 {
return Err(DarError::Corrupt(
"non-zero size on end-of-blocks marker".into(),
));
}
return Ok(());
}
H_DATA => {
if size == 0 {
return Err(DarError::Corrupt("zero-size compressed block".into()));
}
if size > input.len() as u64 {
return Err(DarError::Corrupt(
"compressed block size exceeds remaining input".into(),
));
}
let mut block = vec![0u8; size as usize];
input
.read_exact(&mut block)
.map_err(|_| DarError::Corrupt("truncated compressed block".into()))?;
match algo.to_ascii_lowercase() {
b'q' => decode_lz4_block(&block, &mut raw_block_buf, out)?,
b'l' | b'j' | b'k' => decode_lzo_block(&block, &mut raw_block_buf, out)?,
_ => decode_stream(&block[..], algo, out)?,
}
}
other => {
return Err(DarError::Corrupt(format!(
"unknown compressed block type {other}"
)));
}
}
}
}
fn decode_lz4_block<W: Write>(block: &[u8], buf: &mut [u8], out: &mut W) -> Result<(), DarError> {
let n = lz4_flex::block::decompress_into(block, buf)
.map_err(|e| DarError::Corrupt(format!("lz4 block decode failed: {e}")))?;
out.write_all(&buf[..n])?;
Ok(())
}
fn decode_lzo_block<W: Write>(block: &[u8], buf: &mut [u8], out: &mut W) -> Result<(), DarError> {
let n = lzo::decompress_into(block, buf)
.map_err(|e| DarError::Corrupt(format!("lzo block decode failed: {e}")))?;
out.write_all(&buf[..n])?;
Ok(())
}
struct CapWriter<'a, W: Write> {
inner: &'a mut W,
written: u64,
max: u64,
}
impl<W: Write> Write for CapWriter<'_, W> {
fn write(&mut self, data: &[u8]) -> std::io::Result<usize> {
if self.written + data.len() as u64 > self.max {
return Err(std::io::Error::other("decompressed data exceeds bound"));
}
self.inner.write_all(data)?;
self.written += data.len() as u64;
Ok(data.len())
}
fn flush(&mut self) -> std::io::Result<()> {
self.inner.flush()
}
}
fn decode_stream<R: Read, W: Write>(input: R, algo: u8, out: &mut W) -> Result<(), DarError> {
match algo.to_ascii_lowercase() {
b'z' => {
std::io::copy(&mut flate2::read::ZlibDecoder::new(input), out)
.map_err(|e| DarError::Corrupt(format!("zlib decode failed: {e}")))?;
Ok(())
}
b'y' => {
std::io::copy(&mut bzip2_rs::DecoderReader::new(input), out)
.map_err(|e| DarError::Corrupt(format!("bzip2 decode failed: {e}")))?;
Ok(())
}
b'x' => {
let mut br = std::io::BufReader::new(input);
match lzma_rs::xz_decompress(&mut br, out) {
Ok(()) => {}
Err(lzma_rs::error::Error::XzError(ref m))
if m == "Unexpected data after last XZ block" => {}
Err(e) => return Err(DarError::Corrupt(format!("xz decode failed: {e}"))),
}
Ok(())
}
b'd' => {
let mut dec = ruzstd::StreamingDecoder::new(input)
.map_err(|e| DarError::Corrupt(format!("zstd decode failed: {e}")))?;
std::io::copy(&mut dec, out)
.map_err(|e| DarError::Corrupt(format!("zstd decode failed: {e}")))?;
Ok(())
}
#[rustfmt::skip]
other => Err(DarError::Corrupt(format!("unrecognised compression codec '{}'", other as char))),
}
}
fn read_terminateur<R: Read + Seek>(r: &mut R) -> Result<u64, DarError> {
const BLOCK_SIZE: u64 = 4;
const MAX_BITS: u64 = 4096;
let mut pos = r.seek(SeekFrom::End(0))?;
let mut bits: u64 = 0;
let terminal = loop {
if pos == 0 {
return Err(DarError::Corrupt("terminator underflows archive".into()));
}
pos -= 1;
r.seek(SeekFrom::Start(pos))?;
let b = read_u8(r)?;
if b == 0xFF {
bits += 8;
if bits > MAX_BITS {
return Err(DarError::Corrupt("terminator padding too long".into()));
}
} else {
break b;
}
};
if terminal & 0x80 == 0 {
return Err(DarError::Corrupt(format!(
"invalid terminator byte {terminal:#04x}"
)));
}
let mut x = terminal;
while x != 0 {
if x & 0x80 == 0 {
return Err(DarError::Corrupt("malformed terminator bit run".into()));
}
bits += 1;
x <<= 1;
}
let byte_offset = bits * BLOCK_SIZE;
let infinint_start = pos
.checked_sub(byte_offset)
.ok_or_else(|| DarError::Corrupt("terminator offset underflows".into()))?;
r.seek(SeekFrom::Start(infinint_start))?;
read_infinint(r)
}
fn parse_catalog<R: Read + Seek>(
r: &mut R,
format_major: u32,
global_comp: u8,
) -> Result<(Vec<EntryRef>, bool), DarError> {
let mut entries = Vec::new();
let mut dir_stack: Vec<Vec<u8>> = Vec::new();
let mut depth: u32 = 0;
let mut complete = false;
loop {
let mut buf = [0u8; 1];
match r.read_exact(&mut buf) {
Ok(()) => {}
Err(_) => break,
}
let entry_type = ((buf[0] & 0x1f) | 0x60) as char;
match entry_type {
'z' => {
depth = depth.saturating_sub(1);
dir_stack.pop();
if depth == 0 {
complete = true; break;
}
}
'd' => {
let name = read_nul_bytes(r)?;
let inode = read_inode_base(r, format_major)?;
if format_major >= 9 && (inode.flags >> 4) & 1 != 0 {
skip_fsa(r)?;
}
let is_root = depth == 0;
depth += 1;
if name != b"<ROOT>" {
let path = join_path(&dir_stack, &name);
if !is_root {
entries.push(meta_entry(path, EntryKind::Directory, &inode, None));
}
dir_stack.push(name);
}
}
'f' => {
let name = read_nul_bytes(r)?;
let inode = read_inode_base(r, format_major)?;
if format_major >= 9 && (inode.flags >> 4) & 1 != 0 {
skip_fsa(r)?;
}
let FileFields {
size,
archive_offset,
stored_size,
compression,
crc,
} = read_file_fields(r, format_major, global_comp)?;
entries.push(EntryRef {
path: join_path(&dir_stack, &name),
kind: EntryKind::File,
size,
uid: inode.uid,
gid: inode.gid,
mode: inode.mode,
atime: inode.atime,
mtime: inode.mtime,
ctime: inode.ctime,
symlink_target: None,
archive_offset,
stored_size,
compression,
crc,
});
}
'l' => {
let name = read_nul_bytes(r)?;
let inode = read_inode_base(r, format_major)?;
if format_major >= 9 && (inode.flags >> 4) & 1 != 0 {
skip_fsa(r)?;
}
let target = read_nul_bytes(r)?;
let path = join_path(&dir_stack, &name);
entries.push(meta_entry(path, EntryKind::Symlink, &inode, Some(target)));
}
'p' | 's' => {
let name = read_nul_bytes(r)?;
let inode = read_inode_base(r, format_major)?;
if format_major >= 9 && (inode.flags >> 4) & 1 != 0 {
skip_fsa(r)?;
}
let kind = if entry_type == 'p' {
EntryKind::NamedPipe
} else {
EntryKind::Socket
};
entries.push(meta_entry(join_path(&dir_stack, &name), kind, &inode, None));
}
_ => break, }
}
Ok((entries, complete))
}
struct FileFields {
size: u64,
archive_offset: u64,
stored_size: u64,
compression: u8,
crc: Option<Vec<u8>>,
}
fn read_file_fields<R: Read + Seek>(
r: &mut R,
format_major: u32,
global_comp: u8,
) -> Result<FileFields, DarError> {
let size = read_infinint(r)?;
let archive_offset = read_infinint(r)?;
let (mut stored_size, compression, crc) = if format_major >= 8 {
let ss = read_infinint(r)?;
let _file_data_status = read_u8(r)?;
let comp = read_u8(r)?;
let crc = read_crc(r)?; (ss, comp, crc)
} else if format_major >= 2 {
let ss = read_infinint(r)?;
let mut crcbuf = [0u8; 2]; r.read_exact(&mut crcbuf)?;
(ss, global_comp, Some(crcbuf.to_vec()))
} else {
(size, global_comp, None) };
if format_major <= 7 && stored_size == 0 {
stored_size = size;
}
Ok(FileFields {
size,
archive_offset,
stored_size,
compression,
crc,
})
}
fn read_crc<R: Read>(r: &mut R) -> Result<Option<Vec<u8>>, DarError> {
let crc_size = read_infinint(r)?;
if crc_size == 0 {
return Ok(None);
}
if crc_size > MAX_CRC_SIZE {
return Err(DarError::Corrupt(format!(
"CRC width {crc_size} exceeds {MAX_CRC_SIZE}-byte bound"
)));
}
let mut buf = vec![0u8; crc_size as usize];
r.read_exact(&mut buf)?;
Ok(Some(buf))
}
fn dar_crc(data: &[u8], width: usize) -> Vec<u8> {
let mut acc = vec![0u8; width];
for (i, &b) in data.iter().enumerate() {
acc[i % width] ^= b;
}
acc
}
fn to_hex(bytes: &[u8]) -> String {
let mut s = String::with_capacity(bytes.len() * 2);
for &b in bytes {
s.push(char::from_digit(u32::from(b >> 4), 16).unwrap());
s.push(char::from_digit(u32::from(b & 0xf), 16).unwrap());
}
s
}
fn join_path(stack: &[Vec<u8>], name: &[u8]) -> Vec<u8> {
let mut path = Vec::new();
for component in stack {
path.extend_from_slice(component);
path.push(b'/');
}
path.extend_from_slice(name);
path
}
fn meta_entry(
path: Vec<u8>,
kind: EntryKind,
inode: &Inode,
symlink_target: Option<Vec<u8>>,
) -> EntryRef {
EntryRef {
path,
kind,
size: 0,
uid: inode.uid,
gid: inode.gid,
mode: inode.mode,
atime: inode.atime,
mtime: inode.mtime,
ctime: inode.ctime,
symlink_target,
archive_offset: 0,
stored_size: 0,
compression: b'n',
crc: None,
}
}
fn read_infinint<R: Read>(r: &mut R) -> Result<u64, DarError> {
let terminal = read_u8(r)?;
if terminal == 0x00 {
return Err(DarError::Corrupt(
"infinint exceeds 64-bit range (multi-group encoding)".into(),
));
}
if terminal.count_ones() != 1 {
return Err(DarError::Corrupt(format!(
"invalid infinint terminal: {terminal:#04x}"
)));
}
let pos = terminal.leading_zeros(); if pos > 1 {
return Err(DarError::Corrupt(format!(
"infinint exceeds 64-bit range: terminal {terminal:#04x} implies {} bytes",
(pos + 1) * 4
)));
}
let data_bytes = (pos + 1) * 4; let mut val: u64 = 0;
for _ in 0..data_bytes {
val = (val << 8) | u64::from(read_u8(r)?);
}
Ok(val)
}
fn read_u8<R: Read>(r: &mut R) -> Result<u8, DarError> {
let mut b = [0u8; 1];
r.read_exact(&mut b)?;
Ok(b[0])
}
const MAX_NUL_STRING: usize = 64 * 1024;
fn read_nul_bytes<R: Read>(r: &mut R) -> Result<Vec<u8>, DarError> {
let mut bytes = Vec::new();
loop {
let b = read_u8(r)?;
if b == 0 {
break;
}
if bytes.len() >= MAX_NUL_STRING {
return Err(DarError::Corrupt(format!(
"NUL-terminated string exceeds {MAX_NUL_STRING} bytes"
)));
}
bytes.push(b);
}
Ok(bytes)
}
fn skip_nul_string<R: Read>(r: &mut R) -> Result<(), DarError> {
let mut len: usize = 0;
loop {
if read_u8(r)? == 0 {
return Ok(());
}
len += 1;
if len > MAX_NUL_STRING {
return Err(DarError::Corrupt(format!(
"NUL-terminated string exceeds {MAX_NUL_STRING} bytes"
)));
}
}
}
fn skip<R: Seek>(r: &mut R, n: u64) -> Result<(), DarError> {
if n > 0 {
let off = i64::try_from(n)
.map_err(|_| DarError::Corrupt(format!("skip length {n} exceeds seekable range")))?;
r.seek(SeekFrom::Current(off)).map_err(DarError::Io)?;
}
Ok(())
}
fn read_timestamp<R: Read + Seek>(r: &mut R, format_major: u32) -> Result<i64, DarError> {
if format_major < 9 {
return Ok(read_infinint(r)? as i64);
}
let ts_type = read_u8(r)?;
let secs = read_infinint(r)? as i64;
if ts_type == b'n' || ts_type == b'u' {
read_infinint(r)?;
}
Ok(secs)
}
fn read_u16<R: Read>(r: &mut R) -> Result<u16, DarError> {
let mut b = [0u8; 2];
r.read_exact(&mut b)?;
Ok(u16::from_be_bytes(b))
}
struct Inode {
flags: u8,
uid: u64,
gid: u64,
mode: u16,
atime: i64,
mtime: i64,
ctime: Option<i64>,
}
fn read_inode_base<R: Read + Seek>(r: &mut R, format_major: u32) -> Result<Inode, DarError> {
let flags = if format_major >= 2 { read_u8(r)? } else { 0 };
let (uid, gid) = if format_major <= 7 {
(u64::from(read_u16(r)?), u64::from(read_u16(r)?))
} else {
(read_infinint(r)?, read_infinint(r)?)
};
let mode = read_u16(r)?; let atime = read_timestamp(r, format_major)?;
let mtime = read_timestamp(r, format_major)?;
let ctime = if format_major >= 8 {
Some(read_timestamp(r, format_major)?)
} else {
None
};
if format_major >= 9 && (flags >> 4) & 1 != 0 {
read_infinint(r)?;
read_infinint(r)?;
}
Ok(Inode {
flags,
uid,
gid,
mode,
atime,
mtime,
ctime,
})
}
fn skip_fsa<R: Read + Seek>(r: &mut R) -> Result<(), DarError> {
let _tag = read_infinint(r)?;
let size = read_infinint(r)?;
skip(r, size)
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Cursor;
#[test]
fn slicereader_stops_on_truncated_slice() {
use std::io::Read;
let path = std::env::temp_dir().join(format!("dar_ms_trunc_{}.bin", std::process::id()));
std::fs::write(&path, [1u8, 2, 3, 4]).unwrap();
let mut sr = SliceReader {
slices: vec![SliceSpan {
file: File::open(&path).unwrap(),
file_data_start: 0,
logical_start: 0,
logical_len: 100, }],
pos: 0,
total: 100,
};
let mut buf = [0u8; 50];
assert_eq!(sr.read(&mut buf).unwrap(), 4);
assert_eq!(&buf[..4], &[1, 2, 3, 4]);
let _ = std::fs::remove_file(&path);
}
#[test]
fn infinint_decodes_value() {
let data = [0x80u8, 0x00, 0x00, 0x00, 0x0d];
assert_eq!(read_infinint(&mut Cursor::new(&data[..])).unwrap(), 13);
}
#[test]
fn infinint_bad_preamble_returns_corrupt() {
let data = [0x03u8, 0x00, 0x00, 0x00, 0x00];
let err = read_infinint(&mut Cursor::new(&data[..])).unwrap_err();
assert!(matches!(&err, DarError::Corrupt(_)));
}
#[test]
fn infinint_truncated_returns_io() {
let err = read_infinint(&mut Cursor::new(&[0x80u8, 0x00][..])).unwrap_err();
assert!(matches!(err, DarError::Io(_)));
}
#[test]
fn infinint_0x40_preamble_reads_8_data_bytes() {
let mut data = vec![0x40u8];
data.extend_from_slice(&[0x00, 0x00, 0x00, 0x00, 0x5d, 0x15, 0x93, 0x31]);
assert_eq!(
read_infinint(&mut Cursor::new(data)).unwrap(),
0x5d15_9331u64
);
}
#[test]
fn infinint_multi_bit_terminal_returns_corrupt() {
let data = [0x60u8, 0x00, 0x00, 0x00, 0x00];
let err = read_infinint(&mut Cursor::new(&data[..])).unwrap_err();
assert!(matches!(&err, DarError::Corrupt(_)));
}
#[test]
fn read_u8_reads_single_byte() {
assert_eq!(read_u8(&mut Cursor::new(&[0x42u8][..])).unwrap(), 0x42);
}
#[test]
fn read_u8_eof_returns_io() {
let err = read_u8(&mut Cursor::new(&[][..])).unwrap_err();
assert!(matches!(err, DarError::Io(_)));
}
#[test]
fn nul_bytes_reads_until_nul() {
let data = b"hello\x00world";
assert_eq!(
read_nul_bytes(&mut Cursor::new(&data[..])).unwrap(),
b"hello"
);
}
#[test]
fn nul_bytes_preserves_non_utf8() {
let data = [0xFF, 0x80, 0x00];
assert_eq!(
read_nul_bytes(&mut Cursor::new(&data[..])).unwrap(),
vec![0xFF, 0x80]
);
}
#[test]
fn nul_bytes_eof_before_nul_returns_io() {
let err = read_nul_bytes(&mut Cursor::new(b"no-nul".to_vec())).unwrap_err();
assert!(matches!(err, DarError::Io(_)));
}
#[test]
fn skip_nul_string_advances_past_nul() {
let data = b"skip\x00rest";
let mut c = Cursor::new(data.to_vec());
skip_nul_string(&mut c).unwrap();
assert_eq!(c.position(), 5); }
#[test]
fn skip_nul_string_eof_returns_io() {
let err = skip_nul_string(&mut Cursor::new(b"no-nul".to_vec())).unwrap_err();
assert!(matches!(err, DarError::Io(_)));
}
#[test]
fn find_catalogue_body_too_short() {
let label = [0u8; 10];
let err = find_catalogue(&mut Cursor::new(&[0x01u8, 0x02, 0x03][..]), &label).unwrap_err();
assert!(
matches!(&err, DarError::Corrupt(s) if s == "archive body too short"
|| s == "seqt_catalogue not found")
);
}
#[test]
fn find_catalogue_escape_at_start() {
let mut data = [0xAD, 0xFD, 0xEA, 0x77, 0x21, 0x43, 0xFF];
let mut c = Cursor::new(&mut data[..]);
let via_escape = find_catalogue(&mut c, &[0u8; 10]).unwrap();
assert!(via_escape);
assert_eq!(c.position(), 6);
}
#[test]
fn find_catalogue_escape_not_found() {
let label = [0xFFu8; 10];
let err = find_catalogue(&mut Cursor::new(&[0u8; 10][..]), &label).unwrap_err();
assert!(matches!(&err, DarError::Corrupt(s) if s == "seqt_catalogue not found"));
}
#[test]
fn find_catalogue_label_fallback() {
let label: [u8; 10] = [0xA1, 0xB2, 0xC3, 0xD4, 0xE5, 0xF6, 0x07, 0x18, 0x29, 0x3A];
let mut data = vec![0x00u8; 5];
data.extend_from_slice(&label);
let mut c = Cursor::new(data);
let via_escape = find_catalogue(&mut c, &label).unwrap();
assert!(!via_escape);
assert_eq!(c.position(), 15); }
#[test]
fn skip_zero_does_not_move_cursor() {
let mut c = Cursor::new(vec![0xFFu8; 10]);
skip(&mut c, 0).unwrap();
assert_eq!(c.position(), 0);
}
#[test]
fn skip_n_advances_cursor() {
let mut c = Cursor::new(vec![0xFFu8; 10]);
skip(&mut c, 7).unwrap();
assert_eq!(c.position(), 7);
}
#[test]
fn inode_base_bit4_clear_reads_31_bytes() {
let mut data = vec![0x00u8]; data.extend_from_slice(&[0x80, 0x00, 0x00, 0x00, 0x00]); data.extend_from_slice(&[0x80, 0x00, 0x00, 0x00, 0x00]); data.extend_from_slice(&[0x00, 0x00]); for _ in 0..3 {
data.push(b's'); data.extend_from_slice(&[0x80, 0x00, 0x00, 0x00, 0x00]); }
data.push(0xFF); let mut c = Cursor::new(data);
assert_eq!(read_inode_base(&mut c, 11).unwrap().flags, 0x00);
assert_eq!(c.position(), 31);
}
#[test]
fn inode_base_bit4_set_reads_41_bytes() {
let mut data = vec![0x10u8]; data.extend_from_slice(&[0x80, 0x00, 0x00, 0x00, 0x00]); data.extend_from_slice(&[0x80, 0x00, 0x00, 0x00, 0x00]); data.extend_from_slice(&[0x00, 0x00]); for _ in 0..3 {
data.push(b's');
data.extend_from_slice(&[0x80, 0x00, 0x00, 0x00, 0x00]);
}
data.extend_from_slice(&[0x80, 0x00, 0x00, 0x00, 0x00]); data.extend_from_slice(&[0x80, 0x00, 0x00, 0x00, 0x00]); data.push(0xFF); let mut c = Cursor::new(data);
assert_eq!(read_inode_base(&mut c, 11).unwrap().flags, 0x10);
assert_eq!(c.position(), 41);
}
#[test]
fn skip_fsa_consumes_tag_size_and_data() {
let mut data = Vec::new();
data.extend_from_slice(&[0x80, 0x00, 0x00, 0x00, 0x05]); data.extend_from_slice(&[0x80, 0x00, 0x00, 0x00, 0x03]); data.extend_from_slice(&[0xAA, 0xBB, 0xCC]); data.push(0xFF); let mut c = Cursor::new(data);
skip_fsa(&mut c).unwrap();
assert_eq!(c.position(), 13); }
#[test]
fn infinint_leading_zero_byte_returns_corrupt() {
let data = [0x00u8, 0x80, 0x00, 0x00, 0x00, 0x00];
let err = read_infinint(&mut Cursor::new(&data[..])).unwrap_err();
assert!(matches!(err, DarError::Corrupt(_)), "got {err:?}");
}
#[test]
fn infinint_12_byte_group_exceeds_u64_returns_corrupt() {
let mut data = vec![0x20u8];
data.extend_from_slice(&[0x11; 12]);
let err = read_infinint(&mut Cursor::new(data)).unwrap_err();
assert!(matches!(err, DarError::Corrupt(_)), "got {err:?}");
}
#[test]
fn infinint_all_zero_run_returns_corrupt_without_hanging() {
let data = vec![0u8; 4096];
let err = read_infinint(&mut Cursor::new(data)).unwrap_err();
assert!(matches!(err, DarError::Corrupt(_)), "got {err:?}");
}
#[test]
fn nul_bytes_without_terminator_is_length_bounded() {
let data = vec![b'A'; 200_000];
let err = read_nul_bytes(&mut Cursor::new(data)).unwrap_err();
assert!(matches!(err, DarError::Corrupt(_)), "got {err:?}");
}
#[test]
fn skip_nul_string_without_terminator_is_length_bounded() {
let data = vec![b'A'; 200_000];
let err = skip_nul_string(&mut Cursor::new(data)).unwrap_err();
assert!(matches!(err, DarError::Corrupt(_)), "got {err:?}");
}
#[test]
fn skip_value_above_i64_max_returns_corrupt() {
let mut c = Cursor::new(vec![0u8; 64]);
c.set_position(32);
let err = skip(&mut c, 0x8000_0000_0000_0000).unwrap_err();
assert!(matches!(err, DarError::Corrupt(_)), "got {err:?}");
assert_eq!(c.position(), 32); }
#[test]
fn terminateur_reads_catalogue_offset() {
let data = vec![0x80u8, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0xc0];
assert_eq!(read_terminateur(&mut Cursor::new(data)).unwrap(), 24);
}
#[test]
fn terminateur_all_ff_underflows_returns_corrupt() {
let err = read_terminateur(&mut Cursor::new(vec![0xFFu8; 4])).unwrap_err();
assert!(matches!(err, DarError::Corrupt(_)), "got {err:?}");
}
#[test]
fn terminateur_excessive_ff_padding_returns_corrupt() {
let err = read_terminateur(&mut Cursor::new(vec![0xFFu8; 600])).unwrap_err();
assert!(matches!(err, DarError::Corrupt(_)), "got {err:?}");
}
#[test]
fn terminateur_low_terminator_byte_returns_corrupt() {
let data = vec![0x80u8, 0x00, 0x00, 0x00, 0x18, 0x01];
let err = read_terminateur(&mut Cursor::new(data)).unwrap_err();
assert!(matches!(err, DarError::Corrupt(_)), "got {err:?}");
}
#[test]
fn terminateur_noncontiguous_high_bits_returns_corrupt() {
let data = vec![0x80u8, 0x00, 0x00, 0x00, 0x18, 0xA0];
let err = read_terminateur(&mut Cursor::new(data)).unwrap_err();
assert!(matches!(err, DarError::Corrupt(_)), "got {err:?}");
}
#[test]
fn find_catalogue_falls_back_to_full_scan() {
let mut data = vec![0x11u8, 0x22]; data.extend_from_slice(&SEQT_CATALOGUE);
data.extend_from_slice(&[0x33u8; 12]); let mut c = Cursor::new(data);
let via_escape = find_catalogue_within(&mut c, &[0u8; 10], 4).unwrap();
assert!(via_escape);
assert_eq!(c.position(), 2 + SEQT_CATALOGUE.len() as u64);
}
#[test]
fn find_catalogue_full_scan_miss_returns_not_found() {
let mut c = Cursor::new(vec![0x11u8; 16]);
let err = find_catalogue_within(&mut c, &[0xABu8; 10], 4).unwrap_err();
assert!(matches!(&err, DarError::Corrupt(s) if s == "seqt_catalogue not found"));
}
#[test]
fn find_catalogue_body_too_short_when_origin_at_eof() {
let mut c = Cursor::new(vec![0u8; 6]);
c.seek(SeekFrom::Start(6)).unwrap();
let err = find_catalogue(&mut c, &[0u8; 10]).unwrap_err();
assert!(matches!(&err, DarError::Corrupt(s) if s == "archive body too short"));
}
#[test]
fn decode_stream_caps_decompression_bomb() {
use flate2::{write::ZlibEncoder, Compression};
use std::io::Write;
let mut enc = ZlibEncoder::new(Vec::new(), Compression::default());
enc.write_all(&[0u8; 4096]).unwrap();
let blob = enc.finish().unwrap();
let mut sink = Vec::new();
let mut cap = CapWriter {
inner: &mut sink,
written: 0,
max: 16,
};
let err = decode_stream(&blob[..], b'z', &mut cap).unwrap_err();
assert!(matches!(&err, DarError::Corrupt(s) if s.contains("exceeds bound")));
}
#[test]
fn decode_stream_rejects_malformed_zlib() {
let err = decode_stream(
b"not a zlib stream at all".as_slice(),
b'z',
&mut Vec::new(),
)
.unwrap_err();
assert!(matches!(&err, DarError::Corrupt(s) if s.contains("zlib decode failed")));
}
#[test]
fn decode_stream_rejects_malformed_bzip2() {
let err =
decode_stream(b"not a bzip2 stream".as_slice(), b'y', &mut Vec::new()).unwrap_err();
assert!(matches!(&err, DarError::Corrupt(s) if s.contains("bzip2 decode failed")));
}
#[test]
fn decode_stream_rejects_malformed_xz() {
let err = decode_stream(
b"this is not an xz stream".as_slice(),
b'x',
&mut Vec::new(),
)
.unwrap_err();
assert!(matches!(&err, DarError::Corrupt(s) if s.contains("xz decode failed")));
}
#[test]
fn decode_stream_rejects_malformed_zstd() {
let err = decode_stream(b"not a zstd frame".as_slice(), b'd', &mut Vec::new()).unwrap_err();
assert!(matches!(&err, DarError::Corrupt(s) if s.contains("zstd decode failed")));
}
#[test]
fn decode_stream_rejects_unknown_codec() {
let err = decode_stream(b"data".as_slice(), b'?', &mut Vec::new()).unwrap_err();
assert!(
matches!(&err, DarError::Corrupt(s) if s.contains("unrecognised compression codec"))
);
}
#[test]
fn header_flags_single_two_byte_and_overlong() {
assert_eq!(read_header_flags(&mut [0x10u8].as_slice()).unwrap(), 0x10);
assert_eq!(
read_header_flags(&mut [0x09u8, 0x08].as_slice()).unwrap(),
0x0808
);
let err = read_header_flags(&mut [0xFFu8; 9].as_slice()).unwrap_err();
assert!(matches!(&err, DarError::Corrupt(s) if s.contains("flag field too large")));
}
#[test]
fn compr_bs_edition_one_is_zero() {
assert_eq!(read_compr_bs(&mut b"cmdline\x00rest".as_slice(), 1), 0);
}
#[test]
fn compr_bs_read_after_initial_offset() {
let mut buf = vec![0x00u8]; buf.extend_from_slice(&[0x09, 0x08]); buf.extend_from_slice(&[0x80, 0, 0, 0, 0]); buf.extend_from_slice(&[0x80, 0, 0, 0, 42]); assert_eq!(read_compr_bs(&mut buf.as_slice(), 11), 42);
}
#[test]
fn cap_writer_forwards_within_bound_and_fails_over() {
use std::io::Write;
let mut sink = Vec::new();
let mut w = CapWriter {
inner: &mut sink,
written: 0,
max: 4,
};
assert_eq!(w.write(b"ab").unwrap(), 2); w.flush().unwrap();
let err = w.write(b"cde").unwrap_err(); assert_eq!(err.to_string(), "decompressed data exceeds bound");
assert_eq!(sink, b"ab");
}
}