use alloc::borrow::Cow;
use alloc::borrow::ToOwned;
use alloc::format;
use alloc::string::String;
use alloc::vec::Vec;
use core::str::Utf8Error;
use thiserror::Error;
use zerocopy::FromBytes;
use crate::{
EntryType, GnuExtSparseHeader, Header, HeaderError, PaxError, PaxExtensions, SparseEntry,
HEADER_SIZE, PAX_GID, PAX_GNAME, PAX_GNU_SPARSE_MAJOR, PAX_GNU_SPARSE_MAP,
PAX_GNU_SPARSE_MINOR, PAX_GNU_SPARSE_NAME, PAX_GNU_SPARSE_NUMBYTES, PAX_GNU_SPARSE_OFFSET,
PAX_GNU_SPARSE_REALSIZE, PAX_GNU_SPARSE_SIZE, PAX_LINKPATH, PAX_MTIME, PAX_PATH,
PAX_SCHILY_XATTR, PAX_SIZE, PAX_UID, PAX_UNAME,
};
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Limits {
pub max_metadata_size: u32,
pub max_path_len: Option<u32>,
pub max_pending_entries: usize,
pub max_sparse_entries: usize,
}
impl Default for Limits {
fn default() -> Self {
Self {
max_metadata_size: 1024 * 1024, max_path_len: None,
max_pending_entries: 16,
max_sparse_entries: 10_000,
}
}
}
impl Limits {
#[must_use]
pub fn new() -> Self {
Self::default()
}
#[must_use]
pub fn permissive() -> Self {
Self {
max_metadata_size: u32::MAX,
max_path_len: None,
max_pending_entries: usize::MAX,
max_sparse_entries: 1_000_000,
}
}
pub fn check_path_len(&self, len: usize) -> Result<()> {
if let Some(limit) = self.max_path_len {
if len > limit as usize {
return Err(ParseError::PathTooLong { len, limit });
}
}
Ok(())
}
}
#[derive(Debug, Error)]
pub enum ParseError {
#[cfg(feature = "std")]
#[error("I/O error: {0}")]
Io(#[from] std::io::Error),
#[error("header error: {0}")]
Header(#[from] HeaderError),
#[error("PAX error: {0}")]
Pax(#[from] PaxError),
#[error("invalid UTF-8 in PAX key: {0}")]
InvalidUtf8(#[from] Utf8Error),
#[error("path exceeds limit: {len} bytes > {limit} bytes")]
PathTooLong {
len: usize,
limit: u32,
},
#[error("metadata exceeds limit: {size} bytes > {limit} bytes")]
MetadataTooLarge {
size: u64,
limit: u32,
},
#[error("duplicate GNU long name entry")]
DuplicateGnuLongName,
#[error("duplicate GNU long link entry")]
DuplicateGnuLongLink,
#[error("duplicate PAX extended header")]
DuplicatePaxHeader,
#[error("metadata entries without a following actual entry")]
OrphanedMetadata,
#[error("too many pending metadata entries: {count} > {limit}")]
TooManyPendingEntries {
count: usize,
limit: usize,
},
#[error("too many sparse entries: {count} > {limit}")]
TooManySparseEntries {
count: usize,
limit: usize,
},
#[error("sparse entry type but header is not GNU format")]
SparseNotGnu,
#[error("invalid PAX sparse map: {0}")]
InvalidPaxSparseMap(Cow<'static, str>),
#[error("invalid PAX {key} value: {value:?}")]
InvalidPaxValue {
key: &'static str,
value: Cow<'static, str>,
},
#[error("entry has empty path")]
EmptyPath,
#[error("invalid entry size: {0}")]
InvalidSize(u64),
#[error("unexpected EOF at position {pos}")]
UnexpectedEof {
pos: u64,
},
}
pub type Result<T> = core::result::Result<T, ParseError>;
#[derive(Debug)]
#[allow(clippy::large_enum_variant)]
pub enum ParseEvent<'a> {
NeedData {
min_bytes: usize,
},
Entry {
consumed: usize,
entry: ParsedEntry<'a>,
},
SparseEntry {
consumed: usize,
entry: ParsedEntry<'a>,
sparse_map: Vec<SparseEntry>,
real_size: u64,
},
GlobalExtensions {
consumed: usize,
pax_data: &'a [u8],
},
End {
consumed: usize,
},
}
impl<'a> ParseEvent<'a> {
fn add_consumed(self, n: usize) -> Self {
match self {
ParseEvent::NeedData { min_bytes } => ParseEvent::NeedData {
min_bytes: min_bytes.saturating_add(n),
},
ParseEvent::Entry { consumed, entry } => ParseEvent::Entry {
consumed: consumed.saturating_add(n),
entry,
},
ParseEvent::SparseEntry {
consumed,
entry,
sparse_map,
real_size,
} => ParseEvent::SparseEntry {
consumed: consumed.saturating_add(n),
entry,
sparse_map,
real_size,
},
ParseEvent::GlobalExtensions { consumed, pax_data } => ParseEvent::GlobalExtensions {
consumed: consumed.saturating_add(n),
pax_data,
},
ParseEvent::End { consumed } => ParseEvent::End {
consumed: consumed.saturating_add(n),
},
}
}
}
#[derive(Debug)]
pub struct ParsedEntry<'a> {
pub header: &'a Header,
pub entry_type: EntryType,
pub path: Cow<'a, [u8]>,
pub link_target: Option<Cow<'a, [u8]>>,
pub mode: u32,
pub uid: u64,
pub gid: u64,
pub mtime: u64,
pub size: u64,
pub uname: Option<Cow<'a, [u8]>>,
pub gname: Option<Cow<'a, [u8]>>,
pub dev_major: Option<u32>,
pub dev_minor: Option<u32>,
#[allow(clippy::type_complexity)]
pub xattrs: Vec<(Cow<'a, [u8]>, Cow<'a, [u8]>)>,
pub pax: Option<&'a [u8]>,
}
impl<'a> ParsedEntry<'a> {
#[must_use]
pub fn path_lossy(&self) -> Cow<'_, str> {
String::from_utf8_lossy(&self.path)
}
#[must_use]
pub fn link_target_lossy(&self) -> Option<Cow<'_, str>> {
self.link_target
.as_ref()
.map(|t| String::from_utf8_lossy(t))
}
#[must_use]
pub fn is_file(&self) -> bool {
self.entry_type.is_file()
}
#[must_use]
pub fn is_dir(&self) -> bool {
self.entry_type.is_dir()
}
#[must_use]
pub fn is_symlink(&self) -> bool {
self.entry_type.is_symlink()
}
#[must_use]
pub fn is_hard_link(&self) -> bool {
self.entry_type.is_hard_link()
}
#[must_use]
pub fn padded_size(&self) -> u64 {
self.size.next_multiple_of(HEADER_SIZE as u64)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum State {
ReadHeader,
Done,
}
#[derive(Debug, Clone, Copy)]
enum ExtensionKind {
GnuLongName,
GnuLongLink,
Pax,
}
#[derive(Debug, Default, Clone, Copy)]
struct PendingMetadata<'a> {
gnu_long_name: Option<&'a [u8]>,
gnu_long_link: Option<&'a [u8]>,
pax_extensions: Option<&'a [u8]>,
count: usize,
metadata_size: u64,
}
struct SparseContext {
sparse_map: Vec<SparseEntry>,
real_size: u64,
ext_consumed: usize,
}
impl PendingMetadata<'_> {
fn is_empty(&self) -> bool {
self.gnu_long_name.is_none()
&& self.gnu_long_link.is_none()
&& self.pax_extensions.is_none()
}
}
fn pax_sparse_version(pax: &[u8], ignore_errors: bool) -> Result<Option<(u64, u64)>> {
let mut major = None;
let mut minor = None;
for ext in PaxExtensions::new(pax) {
let ext = ext?;
let key = match ext.key() {
Ok(k) => k,
Err(_) if ignore_errors => continue,
Err(e) => return Err(ParseError::from(e)),
};
match key {
PAX_GNU_SPARSE_MAJOR => {
let s = match ext.value() {
Ok(s) => s,
Err(_) if ignore_errors => continue,
Err(_) => {
return Err(ParseError::InvalidPaxValue {
key: PAX_GNU_SPARSE_MAJOR,
value: Cow::Borrowed("<non-UTF-8>"),
})
}
};
match s.parse::<u64>() {
Ok(v) => major = Some(v),
Err(_) if ignore_errors => {}
Err(_) => {
return Err(ParseError::InvalidPaxValue {
key: PAX_GNU_SPARSE_MAJOR,
value: s.to_owned().into(),
})
}
}
}
PAX_GNU_SPARSE_MINOR => {
let s = match ext.value() {
Ok(s) => s,
Err(_) if ignore_errors => continue,
Err(_) => {
return Err(ParseError::InvalidPaxValue {
key: PAX_GNU_SPARSE_MINOR,
value: Cow::Borrowed("<non-UTF-8>"),
})
}
};
match s.parse::<u64>() {
Ok(v) => minor = Some(v),
Err(_) if ignore_errors => {}
Err(_) => {
return Err(ParseError::InvalidPaxValue {
key: PAX_GNU_SPARSE_MINOR,
value: s.to_owned().into(),
})
}
}
}
_ => {}
}
if major.is_some() && minor.is_some() {
break;
}
}
match (major, minor) {
(Some(maj), Some(min)) => Ok(Some((maj, min))),
_ => Ok(None),
}
}
#[derive(Debug)]
pub struct Parser {
limits: Limits,
state: State,
allow_empty_path: bool,
verify_checksums: bool,
ignore_pax_errors: bool,
}
impl Parser {
#[must_use]
pub fn new(limits: Limits) -> Self {
Self {
limits,
state: State::ReadHeader,
allow_empty_path: false,
verify_checksums: true,
ignore_pax_errors: false,
}
}
pub fn set_allow_empty_path(&mut self, allow: bool) {
self.allow_empty_path = allow;
}
pub fn set_verify_checksums(&mut self, verify: bool) {
self.verify_checksums = verify;
}
pub fn set_ignore_pax_errors(&mut self, ignore: bool) {
self.ignore_pax_errors = ignore;
}
#[must_use]
pub fn with_defaults() -> Self {
Self::new(Limits::default())
}
#[must_use]
pub fn limits(&self) -> &Limits {
&self.limits
}
#[must_use]
pub fn is_done(&self) -> bool {
self.state == State::Done
}
pub fn parse<'a>(&mut self, input: &'a [u8]) -> Result<ParseEvent<'a>> {
match self.state {
State::Done => Ok(ParseEvent::End { consumed: 0 }),
State::ReadHeader => self.parse_header(input, PendingMetadata::default()),
}
}
fn parse_header<'a>(
&mut self,
input: &'a [u8],
slices: PendingMetadata<'a>,
) -> Result<ParseEvent<'a>> {
if input.len() < HEADER_SIZE {
return Ok(ParseEvent::NeedData {
min_bytes: HEADER_SIZE,
});
}
let header_bytes: &[u8; HEADER_SIZE] = input[..HEADER_SIZE]
.try_into()
.expect("already checked input.len() >= HEADER_SIZE");
if header_bytes.iter().all(|&b| b == 0) {
if input.len() < 2 * HEADER_SIZE {
return Ok(ParseEvent::NeedData {
min_bytes: 2 * HEADER_SIZE,
});
}
let second_block = &input[HEADER_SIZE..2 * HEADER_SIZE];
if second_block.iter().all(|&b| b == 0) {
self.state = State::Done;
if !slices.is_empty() {
return Err(ParseError::OrphanedMetadata);
}
return Ok(ParseEvent::End {
consumed: 2 * HEADER_SIZE,
});
}
return self
.parse_header(&input[HEADER_SIZE..], slices)
.map(|e| e.add_consumed(HEADER_SIZE));
}
if slices.count > self.limits.max_pending_entries {
return Err(ParseError::TooManyPendingEntries {
count: slices.count,
limit: self.limits.max_pending_entries,
});
}
let header = Header::from_bytes(header_bytes);
if self.verify_checksums {
header.verify_checksum()?;
}
let entry_type = header.entry_type();
let size = header.entry_size()?;
let padded_size = size
.checked_next_multiple_of(HEADER_SIZE as u64)
.ok_or(ParseError::InvalidSize(size))?;
let is_extension_format = header.is_gnu() || header.is_ustar();
match entry_type {
EntryType::GnuLongName if is_extension_format => {
self.handle_extension(input, size, padded_size, ExtensionKind::GnuLongName, slices)
}
EntryType::GnuLongLink if is_extension_format => {
self.handle_extension(input, size, padded_size, ExtensionKind::GnuLongLink, slices)
}
EntryType::XHeader if is_extension_format => {
self.handle_extension(input, size, padded_size, ExtensionKind::Pax, slices)
}
EntryType::XGlobalHeader => {
if size > self.limits.max_metadata_size as u64 {
return Err(ParseError::MetadataTooLarge {
size,
limit: self.limits.max_metadata_size,
});
}
let total_size = (HEADER_SIZE as u64)
.checked_add(padded_size)
.ok_or(ParseError::InvalidSize(size))?;
if (input.len() as u64) < total_size {
return Ok(ParseEvent::NeedData {
min_bytes: total_size as usize,
});
}
let content_start = HEADER_SIZE;
let content_end = content_start + size as usize;
let pax_data = &input[content_start..content_end];
Ok(ParseEvent::GlobalExtensions {
consumed: total_size as usize,
pax_data,
})
}
EntryType::GnuSparse if is_extension_format => {
self.handle_gnu_sparse(input, header, size, slices)
}
_ => {
let sparse_version = if let Some(pax) = slices.pax_extensions {
pax_sparse_version(pax, self.ignore_pax_errors)?
} else {
None
};
if sparse_version == Some((1, 0)) {
self.handle_pax_sparse_v1(input, header, size, slices)
} else {
self.emit_entry(header, size, None, slices)
}
}
}
}
fn handle_extension<'a>(
&mut self,
input: &'a [u8],
size: u64,
padded_size: u64,
kind: ExtensionKind,
slices: PendingMetadata<'a>,
) -> Result<ParseEvent<'a>> {
let has_dup = match kind {
ExtensionKind::GnuLongName => slices.gnu_long_name.is_some(),
ExtensionKind::GnuLongLink => slices.gnu_long_link.is_some(),
ExtensionKind::Pax => slices.pax_extensions.is_some(),
};
if has_dup {
return Err(match kind {
ExtensionKind::GnuLongName => ParseError::DuplicateGnuLongName,
ExtensionKind::GnuLongLink => ParseError::DuplicateGnuLongLink,
ExtensionKind::Pax => ParseError::DuplicatePaxHeader,
});
}
let new_metadata_size = slices.metadata_size + size;
if new_metadata_size > self.limits.max_metadata_size as u64 {
return Err(ParseError::MetadataTooLarge {
size: new_metadata_size,
limit: self.limits.max_metadata_size,
});
}
let total_size = (HEADER_SIZE as u64)
.checked_add(padded_size)
.ok_or(ParseError::InvalidSize(size))?;
if (input.len() as u64) < total_size {
return Ok(ParseEvent::NeedData {
min_bytes: total_size as usize,
});
}
let content_start = HEADER_SIZE;
let content_end = content_start + size as usize;
let mut data: &'a [u8] = &input[content_start..content_end];
if matches!(
kind,
ExtensionKind::GnuLongName | ExtensionKind::GnuLongLink
) {
if let Some(trimmed) = data.strip_suffix(&[0]) {
data = trimmed;
}
self.limits.check_path_len(data.len())?;
}
let mut new_slices = PendingMetadata {
count: slices.count + 1,
metadata_size: new_metadata_size,
..slices
};
match kind {
ExtensionKind::GnuLongName => new_slices.gnu_long_name = Some(data),
ExtensionKind::GnuLongLink => new_slices.gnu_long_link = Some(data),
ExtensionKind::Pax => new_slices.pax_extensions = Some(data),
}
self.parse_header(&input[total_size as usize..], new_slices)
.map(|e| e.add_consumed(total_size as usize))
}
fn handle_pax_sparse_v1<'a>(
&mut self,
input: &'a [u8],
header: &'a Header,
size: u64,
slices: PendingMetadata<'a>,
) -> Result<ParseEvent<'a>> {
let pax = slices
.pax_extensions
.ok_or(ParseError::InvalidPaxSparseMap(Cow::Borrowed(
"missing PAX extensions",
)))?;
let ignore_errors = self.ignore_pax_errors;
let mut real_size = None;
let mut sparse_name = None;
for ext in PaxExtensions::new(pax) {
let ext = ext?;
let key = match ext.key() {
Ok(k) => k,
Err(_) if ignore_errors => continue,
Err(e) => return Err(ParseError::from(e)),
};
match key {
PAX_GNU_SPARSE_REALSIZE | PAX_GNU_SPARSE_SIZE => {
let s = match ext.value() {
Ok(s) => s,
Err(_) if ignore_errors => continue,
Err(_) => {
return Err(ParseError::InvalidPaxValue {
key: PAX_GNU_SPARSE_REALSIZE,
value: Cow::Borrowed("<non-UTF-8>"),
})
}
};
match s.parse::<u64>() {
Ok(v) => real_size = Some(v),
Err(_) if ignore_errors => {}
Err(_) => {
return Err(ParseError::InvalidPaxValue {
key: PAX_GNU_SPARSE_REALSIZE,
value: s.to_owned().into(),
})
}
}
}
PAX_GNU_SPARSE_NAME => {
sparse_name = Some(ext.value_bytes());
}
_ => {}
}
}
let real_size = real_size.ok_or(ParseError::InvalidPaxSparseMap(Cow::Borrowed(
"missing GNU.sparse.realsize",
)))?;
let data_start = HEADER_SIZE;
let data = &input[data_start..];
let mut pos = 0usize;
let read_line = |data: &[u8], pos: &mut usize| -> Option<Result<u64>> {
let remaining = &data[*pos..];
let nl = remaining.iter().position(|&b| b == b'\n')?;
let line = &remaining[..nl];
*pos += nl + 1;
let s = match core::str::from_utf8(line) {
Ok(s) => s,
Err(_) => {
return Some(Err(ParseError::InvalidPaxSparseMap(Cow::Borrowed(
"non-UTF8 in sparse map",
))))
}
};
match s.parse::<u64>() {
Ok(v) => Some(Ok(v)),
Err(_) => Some(Err(ParseError::InvalidPaxSparseMap(
format!("invalid decimal: {s:?}").into(),
))),
}
};
let num_entries = match read_line(data, &mut pos) {
Some(r) => r?,
None => {
return Ok(ParseEvent::NeedData {
min_bytes: data_start + pos + HEADER_SIZE,
});
}
};
if num_entries as usize > self.limits.max_sparse_entries {
return Err(ParseError::TooManySparseEntries {
count: num_entries as usize,
limit: self.limits.max_sparse_entries,
});
}
let mut sparse_map = Vec::with_capacity((num_entries as usize).min(1024));
for _ in 0..num_entries {
let offset = match read_line(data, &mut pos) {
Some(r) => r?,
None => {
return Ok(ParseEvent::NeedData {
min_bytes: data_start + pos + HEADER_SIZE,
});
}
};
let length = match read_line(data, &mut pos) {
Some(r) => r?,
None => {
return Ok(ParseEvent::NeedData {
min_bytes: data_start + pos + HEADER_SIZE,
});
}
};
sparse_map.push(SparseEntry { offset, length });
}
let map_size = pos.next_multiple_of(HEADER_SIZE);
if data.len() < map_size {
return Ok(ParseEvent::NeedData {
min_bytes: data_start + map_size,
});
}
let content_size =
size.checked_sub(map_size as u64)
.ok_or(ParseError::InvalidPaxSparseMap(Cow::Borrowed(
"sparse map prefix larger than entry size",
)))?;
let sparse_ctx = SparseContext {
sparse_map,
real_size,
ext_consumed: map_size,
};
let slices = if let Some(name) = sparse_name {
PendingMetadata {
gnu_long_name: Some(name),
..slices
}
} else {
slices
};
self.emit_entry(header, content_size, Some(sparse_ctx), slices)
}
fn handle_gnu_sparse<'a>(
&mut self,
input: &'a [u8],
header: &'a Header,
size: u64,
slices: PendingMetadata<'a>,
) -> Result<ParseEvent<'a>> {
let gnu = header.try_as_gnu().ok_or(ParseError::SparseNotGnu)?;
let real_size = gnu.real_size()?;
let mut sparse_map = Vec::new();
for desc in &gnu.sparse {
if desc.is_empty() {
break;
}
let entry = desc.to_sparse_entry()?;
sparse_map.push(entry);
}
let mut ext_consumed = 0usize;
if gnu.is_extended() {
let mut offset = HEADER_SIZE; loop {
if input.len() < offset + HEADER_SIZE {
return Ok(ParseEvent::NeedData {
min_bytes: offset + HEADER_SIZE,
});
}
let ext_bytes: &[u8; HEADER_SIZE] = input[offset..offset + HEADER_SIZE]
.try_into()
.expect("checked length");
let ext = GnuExtSparseHeader::ref_from_bytes(ext_bytes)
.expect("GnuExtSparseHeader is 512 bytes");
for desc in &ext.sparse {
if desc.is_empty() {
break;
}
if sparse_map.len() >= self.limits.max_sparse_entries {
return Err(ParseError::TooManySparseEntries {
count: sparse_map.len() + 1,
limit: self.limits.max_sparse_entries,
});
}
let entry = desc.to_sparse_entry()?;
sparse_map.push(entry);
}
offset += HEADER_SIZE;
if !ext.is_extended() {
break;
}
}
ext_consumed = offset - HEADER_SIZE; }
if sparse_map.len() > self.limits.max_sparse_entries {
return Err(ParseError::TooManySparseEntries {
count: sparse_map.len(),
limit: self.limits.max_sparse_entries,
});
}
let sparse_ctx = SparseContext {
sparse_map,
real_size,
ext_consumed,
};
self.emit_entry(header, size, Some(sparse_ctx), slices)
}
fn emit_entry<'a>(
&mut self,
header: &'a Header,
size: u64,
sparse: Option<SparseContext>,
slices: PendingMetadata<'a>,
) -> Result<ParseEvent<'a>> {
let mut path: Cow<'a, [u8]> = Cow::Borrowed(header.path_bytes());
let mut link_target: Option<Cow<'a, [u8]>> = None;
let mut uid = header.uid()?;
let mut gid = header.gid()?;
let mut mtime = header.mtime()?;
let mut entry_size = size;
let mut xattrs = Vec::new();
let mut uname: Option<Cow<'a, [u8]>> = header
.username()
.filter(|b| !b.is_empty())
.map(Cow::Borrowed);
let mut gname: Option<Cow<'a, [u8]>> = header
.groupname()
.filter(|b| !b.is_empty())
.map(Cow::Borrowed);
if let Some(prefix) = header.prefix() {
if !prefix.is_empty() {
let mut full_path = prefix.to_vec();
full_path.push(b'/');
full_path.extend_from_slice(header.path_bytes());
path = Cow::Owned(full_path);
}
}
if let Some(long_name) = slices.gnu_long_name {
path = Cow::Borrowed(long_name);
}
if let Some(long_link) = slices.gnu_long_link {
link_target = Some(Cow::Borrowed(long_link));
} else {
let header_link = header.link_name_bytes();
if !header_link.is_empty() {
link_target = Some(Cow::Borrowed(header_link));
}
}
let raw_pax = slices.pax_extensions;
let mut pax_sparse_map: Option<Vec<SparseEntry>> = None;
let mut pax_sparse_real_size: Option<u64> = None;
let mut pax_sparse_name: Option<&'a [u8]> = None;
let mut pax_sparse_pending_offset: Option<u64> = None;
if let Some(pax) = raw_pax {
let ignore_errors = self.ignore_pax_errors;
let extensions = PaxExtensions::new(pax);
let parse_pax_u64 =
|ext: &crate::PaxExtension<'_>, key: &'static str| -> Result<Option<u64>> {
let s = match ext.value() {
Ok(s) => s,
Err(_) if ignore_errors => return Ok(None),
Err(_) => {
return Err(ParseError::InvalidPaxValue {
key,
value: Cow::Borrowed("<non-UTF-8>"),
})
}
};
match s.parse::<u64>() {
Ok(v) => Ok(Some(v)),
Err(_) if ignore_errors => Ok(None),
Err(_) => Err(ParseError::InvalidPaxValue {
key,
value: s.to_owned().into(),
}),
}
};
for ext in extensions {
let ext = ext?;
let key = ext.key().map_err(ParseError::from)?;
let value = ext.value_bytes();
match key {
PAX_PATH => {
self.limits.check_path_len(value.len())?;
path = Cow::Borrowed(value);
}
PAX_LINKPATH => {
self.limits.check_path_len(value.len())?;
link_target = Some(Cow::Borrowed(value));
}
PAX_SIZE => {
if let Some(v) = parse_pax_u64(&ext, PAX_SIZE)? {
entry_size = v;
}
}
PAX_UID => {
if let Some(v) = parse_pax_u64(&ext, PAX_UID)? {
uid = v;
}
}
PAX_GID => {
if let Some(v) = parse_pax_u64(&ext, PAX_GID)? {
gid = v;
}
}
PAX_MTIME => {
let s = match ext.value() {
Ok(s) => s,
Err(_) if ignore_errors => continue,
Err(_) => {
return Err(ParseError::InvalidPaxValue {
key: PAX_MTIME,
value: Cow::Borrowed("<non-UTF-8>"),
})
}
};
let int_part = s.split('.').next().unwrap_or(s);
match int_part.parse::<u64>() {
Ok(v) => mtime = v,
Err(_) if ignore_errors => {}
Err(_) => {
return Err(ParseError::InvalidPaxValue {
key: PAX_MTIME,
value: s.to_owned().into(),
})
}
}
}
PAX_UNAME => {
uname = Some(Cow::Borrowed(value));
}
PAX_GNAME => {
gname = Some(Cow::Borrowed(value));
}
PAX_GNU_SPARSE_OFFSET => {
let v = parse_pax_u64(&ext, PAX_GNU_SPARSE_OFFSET)?;
pax_sparse_pending_offset = v;
}
PAX_GNU_SPARSE_NUMBYTES => {
if let (Some(offset), Some(length)) = (
pax_sparse_pending_offset.take(),
parse_pax_u64(&ext, PAX_GNU_SPARSE_NUMBYTES)?,
) {
let map = pax_sparse_map.get_or_insert_with(Vec::new);
if map.len() >= self.limits.max_sparse_entries {
return Err(ParseError::TooManySparseEntries {
count: map.len() + 1,
limit: self.limits.max_sparse_entries,
});
}
map.push(SparseEntry { offset, length });
}
}
PAX_GNU_SPARSE_MAP => {
let s = match ext.value() {
Ok(s) => s,
Err(_) if ignore_errors => continue,
Err(_) => {
return Err(ParseError::InvalidPaxSparseMap(Cow::Borrowed(
"non-UTF8 sparse map",
)))
}
};
let mut map = Vec::new();
let parts: Vec<&str> = s.split(',').filter(|p| !p.is_empty()).collect();
if parts.len() % 2 != 0 {
return Err(ParseError::InvalidPaxSparseMap(Cow::Borrowed(
"odd number of values in GNU.sparse.map",
)));
}
for pair in parts.chunks(2) {
if map.len() >= self.limits.max_sparse_entries {
return Err(ParseError::TooManySparseEntries {
count: map.len() + 1,
limit: self.limits.max_sparse_entries,
});
}
let offset = pair[0].parse::<u64>().map_err(|_| {
ParseError::InvalidPaxSparseMap(
format!("invalid offset: {:?}", pair[0]).into(),
)
})?;
let length = pair[1].parse::<u64>().map_err(|_| {
ParseError::InvalidPaxSparseMap(
format!("invalid length: {:?}", pair[1]).into(),
)
})?;
map.push(SparseEntry { offset, length });
}
pax_sparse_map = Some(map);
}
PAX_GNU_SPARSE_REALSIZE | PAX_GNU_SPARSE_SIZE => {
if let Some(v) = parse_pax_u64(&ext, PAX_GNU_SPARSE_REALSIZE)? {
pax_sparse_real_size = Some(v);
}
}
PAX_GNU_SPARSE_NAME => {
self.limits.check_path_len(value.len())?;
pax_sparse_name = Some(value);
}
PAX_GNU_SPARSE_MAJOR | PAX_GNU_SPARSE_MINOR => {}
_ => {
if let Some(attr_name) = key.strip_prefix(PAX_SCHILY_XATTR) {
xattrs
.push((Cow::Borrowed(attr_name.as_bytes()), Cow::Borrowed(value)));
}
}
}
}
}
if let Some(name) = pax_sparse_name {
path = Cow::Borrowed(name);
}
if link_target.as_ref().is_some_and(|v| v.is_empty()) {
link_target = None;
}
if uname.as_ref().is_some_and(|v| v.is_empty()) {
uname = None;
}
if gname.as_ref().is_some_and(|v| v.is_empty()) {
gname = None;
}
if path.is_empty() && !self.allow_empty_path {
return Err(ParseError::EmptyPath);
}
self.limits.check_path_len(path.len())?;
let entry = ParsedEntry {
header,
entry_type: header.entry_type(),
path,
link_target,
mode: header.mode()?,
uid,
gid,
mtime,
size: entry_size,
uname,
gname,
dev_major: header.device_major()?,
dev_minor: header.device_minor()?,
xattrs,
pax: raw_pax,
};
let sparse = sparse.or_else(|| {
pax_sparse_map.map(|map| SparseContext {
sparse_map: map,
real_size: pax_sparse_real_size.unwrap_or(entry_size),
ext_consumed: 0, })
});
if let Some(ctx) = sparse {
Ok(ParseEvent::SparseEntry {
consumed: HEADER_SIZE + ctx.ext_consumed,
entry,
sparse_map: ctx.sparse_map,
real_size: ctx.real_size,
})
} else {
Ok(ParseEvent::Entry {
consumed: HEADER_SIZE,
entry,
})
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{GNU_MAGIC, GNU_VERSION, USTAR_MAGIC, USTAR_VERSION};
#[test]
fn test_default_limits() {
let limits = Limits::default();
assert_eq!(limits.max_metadata_size, 1024 * 1024);
assert_eq!(limits.max_path_len, None);
assert_eq!(limits.max_pending_entries, 16);
}
#[test]
fn test_permissive_limits() {
let limits = Limits::permissive();
assert_eq!(limits.max_metadata_size, u32::MAX);
assert_eq!(limits.max_path_len, None);
}
#[test]
fn test_permissive_limits_relaxed() {
let limits = Limits::permissive();
assert!(limits.max_metadata_size > Limits::default().max_metadata_size);
assert!(limits.max_pending_entries > Limits::default().max_pending_entries);
}
#[test]
fn test_parser_empty_archive() {
let mut parser = Parser::new(Limits::default());
let data = [0u8; 1024];
let event = parser.parse(&data).unwrap();
assert!(matches!(event, ParseEvent::End { consumed: 1024 }));
assert!(parser.is_done());
}
#[test]
fn test_parser_need_data() {
let mut parser = Parser::new(Limits::default());
let data = [0u8; 256];
let event = parser.parse(&data).unwrap();
assert!(matches!(event, ParseEvent::NeedData { min_bytes: 512 }));
}
#[test]
fn test_parser_need_more_for_end() {
let mut parser = Parser::new(Limits::default());
let data = [0u8; 512];
let event = parser.parse(&data).unwrap();
assert!(matches!(event, ParseEvent::NeedData { min_bytes: 1024 }));
}
#[test]
fn test_parser_with_real_header() {
let mut parser = Parser::new(Limits::default());
let mut data = vec![0u8; 2048];
data[0..8].copy_from_slice(b"test.txt");
data[100..107].copy_from_slice(b"0000644");
data[108..115].copy_from_slice(b"0000000");
data[116..123].copy_from_slice(b"0000000");
data[124..135].copy_from_slice(b"00000000000");
data[136..147].copy_from_slice(b"00000000000");
data[156] = b'0';
data[257..263].copy_from_slice(USTAR_MAGIC);
data[263..265].copy_from_slice(USTAR_VERSION);
let header = Header::from_bytes((&data[..512]).try_into().unwrap());
let checksum = header.compute_checksum();
let checksum_str = format!("{checksum:06o}\0 ");
data[148..156].copy_from_slice(checksum_str.as_bytes());
let event = parser.parse(&data).unwrap();
match event {
ParseEvent::Entry { consumed, entry } => {
assert_eq!(consumed, 512);
assert_eq!(entry.path_lossy(), "test.txt");
assert_eq!(entry.size, 0);
assert!(entry.is_file());
}
other => panic!("Expected Entry, got {:?}", other),
}
let event = parser.parse(&data[512..]).unwrap();
assert!(matches!(event, ParseEvent::End { consumed: 1024 }));
}
#[test]
fn test_parser_entry_with_content() {
let mut parser = Parser::new(Limits::default());
let mut data = vec![0u8; 2560];
data[0..8].copy_from_slice(b"test.txt");
data[100..107].copy_from_slice(b"0000644");
data[108..115].copy_from_slice(b"0000000");
data[116..123].copy_from_slice(b"0000000");
data[124..135].copy_from_slice(b"00000000005"); data[136..147].copy_from_slice(b"00000000000");
data[156] = b'0';
data[257..263].copy_from_slice(USTAR_MAGIC);
data[263..265].copy_from_slice(USTAR_VERSION);
let header = Header::from_bytes((&data[..512]).try_into().unwrap());
let checksum = header.compute_checksum();
let checksum_str = format!("{checksum:06o}\0 ");
data[148..156].copy_from_slice(checksum_str.as_bytes());
data[512..517].copy_from_slice(b"hello");
let event = parser.parse(&data).unwrap();
match event {
ParseEvent::Entry { consumed, entry } => {
assert_eq!(consumed, 512);
assert_eq!(entry.path_lossy(), "test.txt");
assert_eq!(entry.size, 5);
assert_eq!(entry.padded_size(), 512);
}
other => panic!("Expected Entry, got {:?}", other),
}
let event = parser.parse(&data[1024..]).unwrap();
assert!(matches!(event, ParseEvent::End { consumed: 1024 }));
}
fn make_header(name: &[u8], size: u64, typeflag: u8) -> [u8; HEADER_SIZE] {
let mut header = [0u8; HEADER_SIZE];
let name_len = name.len().min(100);
header[0..name_len].copy_from_slice(&name[..name_len]);
header[100..107].copy_from_slice(b"0000644");
header[108..115].copy_from_slice(b"0001750");
header[116..123].copy_from_slice(b"0001750");
let size_str = format!("{size:011o}");
header[124..135].copy_from_slice(size_str.as_bytes());
header[136..147].copy_from_slice(b"14712345670");
header[156] = typeflag;
header[257..263].copy_from_slice(USTAR_MAGIC);
header[263..265].copy_from_slice(USTAR_VERSION);
let hdr = Header::from_bytes(&header);
let checksum = hdr.compute_checksum();
let checksum_str = format!("{checksum:06o}\0 ");
header[148..156].copy_from_slice(checksum_str.as_bytes());
header
}
fn make_link_header(name: &[u8], link_target: &[u8], typeflag: u8) -> [u8; HEADER_SIZE] {
let mut header = make_header(name, 0, typeflag);
let link_len = link_target.len().min(100);
header[157..157 + link_len].copy_from_slice(&link_target[..link_len]);
let hdr = Header::from_bytes(&header);
let checksum = hdr.compute_checksum();
let checksum_str = format!("{checksum:06o}\0 ");
header[148..156].copy_from_slice(checksum_str.as_bytes());
header
}
fn make_gnu_long_name(name: &[u8]) -> Vec<u8> {
let content_size = name.len() + 1; let padded = content_size.next_multiple_of(HEADER_SIZE);
let header = make_header(b"././@LongLink", content_size as u64, b'L');
let mut result = Vec::with_capacity(HEADER_SIZE + padded);
result.extend_from_slice(&header);
result.extend_from_slice(name);
result.push(0); result.extend(zeroes(padded - content_size));
result
}
fn make_gnu_long_link(link: &[u8]) -> Vec<u8> {
let content_size = link.len() + 1; let padded = content_size.next_multiple_of(HEADER_SIZE);
let header = make_header(b"././@LongLink", content_size as u64, b'K');
let mut result = Vec::with_capacity(HEADER_SIZE + padded);
result.extend_from_slice(&header);
result.extend_from_slice(link);
result.push(0); result.extend(zeroes(padded - content_size));
result
}
fn make_pax_entry(name: &[u8], type_flag: u8, entries: &[(&str, &[u8])]) -> Vec<u8> {
use crate::builder::DecU64;
let mut content = Vec::new();
for (key, value) in entries {
let rest_len = 3 + key.len() + value.len();
let mut len_len = 1;
let mut max_len = 10;
while rest_len + len_len >= max_len {
len_len += 1;
max_len *= 10;
}
let total_len = rest_len + len_len;
let len_dec = DecU64::new(total_len as u64);
content.extend_from_slice(len_dec.as_bytes());
content.push(b' ');
content.extend_from_slice(key.as_bytes());
content.push(b'=');
content.extend_from_slice(value);
content.push(b'\n');
}
let content_size = content.len();
let header = make_header(name, content_size as u64, type_flag);
let padded = content_size.next_multiple_of(HEADER_SIZE);
let mut result = Vec::with_capacity(HEADER_SIZE + padded);
result.extend_from_slice(&header);
result.extend_from_slice(&content);
result.extend(zeroes(padded - content_size));
result
}
fn make_pax_header(entries: &[(&str, &[u8])]) -> Vec<u8> {
make_pax_entry(b"PaxHeader/file", b'x', entries)
}
fn make_pax_global_header(entries: &[(&str, &[u8])]) -> Vec<u8> {
make_pax_entry(b"pax_global_header", b'g', entries)
}
fn zeroes(n: usize) -> impl Iterator<Item = u8> {
std::iter::repeat_n(0u8, n)
}
#[test]
fn test_parser_gnu_long_name() {
let long_name =
"very/long/path/that/exceeds/one/hundred/bytes/".to_string() + &"x".repeat(60);
assert!(long_name.len() > 100);
let mut archive = Vec::new();
archive.extend(make_gnu_long_name(long_name.as_bytes()));
archive.extend_from_slice(&make_header(b"placeholder", 5, b'0'));
let mut content_block = [0u8; 512];
content_block[0..5].copy_from_slice(b"hello");
archive.extend_from_slice(&content_block);
archive.extend(zeroes(1024));
let mut parser = Parser::new(Limits::default());
let event = parser.parse(&archive).unwrap();
let consumed = match &event {
ParseEvent::Entry { consumed, entry } => {
assert!(*consumed > 512);
assert_eq!(entry.path.as_ref(), long_name.as_bytes());
assert_eq!(entry.size, 5);
assert!(entry.is_file());
*consumed
}
other => panic!("Expected Entry, got {:?}", other),
};
let remaining = &archive[consumed + 512..];
let event = parser.parse(remaining).unwrap();
assert!(matches!(event, ParseEvent::End { .. }));
}
#[test]
fn test_parser_gnu_long_link() {
let long_target = "/some/very/long/symlink/target/path/".to_string() + &"t".repeat(80);
assert!(long_target.len() > 100);
let mut archive = Vec::new();
archive.extend(make_gnu_long_link(long_target.as_bytes()));
archive.extend_from_slice(&make_link_header(b"mylink", b"placeholder", b'2'));
archive.extend(zeroes(1024));
let mut parser = Parser::new(Limits::default());
let event = parser.parse(&archive).unwrap();
let consumed = match &event {
ParseEvent::Entry { consumed, entry } => {
assert_eq!(entry.path.as_ref(), b"mylink");
assert!(entry.is_symlink());
assert_eq!(
entry.link_target.as_ref().unwrap().as_ref(),
long_target.as_bytes()
);
*consumed
}
other => panic!("Expected Entry, got {:?}", other),
};
let remaining = &archive[consumed..];
let event = parser.parse(remaining).unwrap();
assert!(matches!(event, ParseEvent::End { .. }));
}
#[test]
fn test_parser_pax_path_override() {
let pax_path = "pax/overridden/path/to/file.txt";
let mut archive = Vec::new();
archive.extend(make_pax_header(&[("path", pax_path.as_bytes())]));
archive.extend_from_slice(&make_header(b"original.txt", 0, b'0'));
archive.extend(zeroes(1024));
let mut parser = Parser::new(Limits::default());
let event = parser.parse(&archive).unwrap();
match event {
ParseEvent::Entry { entry, .. } => {
assert_eq!(entry.path.as_ref(), pax_path.as_bytes());
}
other => panic!("Expected Entry, got {:?}", other),
}
}
#[test]
fn test_parser_pax_size_override() {
let mut archive = Vec::new();
archive.extend(make_pax_header(&[("size", b"999")]));
archive.extend_from_slice(&make_header(b"file.txt", 5, b'0'));
archive.extend(zeroes(1024));
let mut parser = Parser::new(Limits::default());
let event = parser.parse(&archive).unwrap();
match event {
ParseEvent::Entry { entry, .. } => {
assert_eq!(entry.size, 999);
}
other => panic!("Expected Entry, got {:?}", other),
}
}
#[test]
fn test_parser_pax_metadata() {
let mut archive = Vec::new();
archive.extend(make_pax_header(&[
("uid", b"65534"),
("gid", b"65535"),
("mtime", b"1700000000.123456789"),
]));
archive.extend_from_slice(&make_header(b"file.txt", 0, b'0'));
archive.extend(zeroes(1024));
let mut parser = Parser::new(Limits::default());
let event = parser.parse(&archive).unwrap();
match event {
ParseEvent::Entry { entry, .. } => {
assert_eq!(entry.uid, 65534);
assert_eq!(entry.gid, 65535);
assert_eq!(entry.mtime, 1700000000);
}
other => panic!("Expected Entry, got {:?}", other),
}
}
#[test]
fn test_parser_pax_xattr() {
let mut archive = Vec::new();
archive.extend(make_pax_header(&[
("SCHILY.xattr.user.test", b"test_value"),
(
"SCHILY.xattr.security.selinux",
b"system_u:object_r:unlabeled_t:s0",
),
]));
archive.extend_from_slice(&make_header(b"file.txt", 0, b'0'));
archive.extend(zeroes(1024));
let mut parser = Parser::new(Limits::default());
let event = parser.parse(&archive).unwrap();
match event {
ParseEvent::Entry { entry, .. } => {
assert_eq!(entry.xattrs.len(), 2);
assert_eq!(entry.xattrs[0].0.as_ref(), b"user.test");
assert_eq!(entry.xattrs[0].1.as_ref(), b"test_value");
assert_eq!(entry.xattrs[1].0.as_ref(), b"security.selinux");
assert_eq!(
entry.xattrs[1].1.as_ref(),
b"system_u:object_r:unlabeled_t:s0"
);
}
other => panic!("Expected Entry, got {:?}", other),
}
}
#[test]
fn test_parser_pax_raw_bytes_preserved() {
let mut archive = Vec::new();
archive.extend(make_pax_header(&[
("path", b"custom/path.txt"),
("SCHILY.xattr.user.key", b"val"),
("myfancykey", b"myfancyvalue"),
]));
archive.extend_from_slice(&make_header(b"orig.txt", 0, b'0'));
archive.extend(zeroes(1024));
let mut parser = Parser::new(Limits::default());
let event = parser.parse(&archive).unwrap();
match event {
ParseEvent::Entry { entry, .. } => {
assert_eq!(entry.path.as_ref(), b"custom/path.txt");
assert_eq!(entry.xattrs.len(), 1);
let raw = entry.pax.expect("pax should be Some");
let exts = PaxExtensions::new(raw);
let keys: Vec<&str> = exts
.filter_map(|e| e.ok())
.filter_map(|e| e.key().ok())
.collect();
assert_eq!(keys, &["path", "SCHILY.xattr.user.key", "myfancykey"]);
}
other => panic!("Expected Entry, got {:?}", other),
}
}
#[test]
fn test_parser_no_pax_means_none() {
let mut archive = Vec::new();
archive.extend_from_slice(&make_header(b"plain.txt", 0, b'0'));
archive.extend(zeroes(1024));
let mut parser = Parser::new(Limits::default());
let event = parser.parse(&archive).unwrap();
match event {
ParseEvent::Entry { entry, .. } => {
assert!(entry.pax.is_none());
}
other => panic!("Expected Entry, got {:?}", other),
}
}
#[test]
fn test_parser_gnu_long_name_no_pax() {
let long_name = "long/path/".to_string() + &"x".repeat(100);
let mut archive = Vec::new();
archive.extend(make_gnu_long_name(long_name.as_bytes()));
archive.extend_from_slice(&make_header(b"short", 0, b'0'));
archive.extend(zeroes(1024));
let mut parser = Parser::new(Limits::default());
let event = parser.parse(&archive).unwrap();
match event {
ParseEvent::Entry { entry, .. } => {
assert_eq!(entry.path.as_ref(), long_name.as_bytes());
assert!(entry.pax.is_none());
}
other => panic!("Expected Entry, got {:?}", other),
}
}
#[test]
fn test_parser_pax_linkpath() {
let pax_linkpath = "/a/very/long/symlink/target/from/pax";
let mut archive = Vec::new();
archive.extend(make_pax_header(&[("linkpath", pax_linkpath.as_bytes())]));
archive.extend_from_slice(&make_link_header(b"mylink", b"short", b'2'));
archive.extend(zeroes(1024));
let mut parser = Parser::new(Limits::default());
let event = parser.parse(&archive).unwrap();
match event {
ParseEvent::Entry { entry, .. } => {
assert!(entry.is_symlink());
assert_eq!(
entry.link_target.as_ref().unwrap().as_ref(),
pax_linkpath.as_bytes()
);
}
other => panic!("Expected Entry, got {:?}", other),
}
}
#[test]
fn test_parser_global_pax_header() {
let mut archive = Vec::new();
archive.extend(make_pax_global_header(&[
("mtime", b"1700000000"),
(
"SCHILY.xattr.security.selinux",
b"system_u:object_r:default_t:s0",
),
]));
archive.extend_from_slice(&make_header(b"file.txt", 0, b'0'));
archive.extend(zeroes(1024));
let mut parser = Parser::new(Limits::default());
let event = parser.parse(&archive).unwrap();
let consumed = match &event {
ParseEvent::GlobalExtensions { consumed, pax_data } => {
let exts = PaxExtensions::new(pax_data);
let keys: Vec<&str> = exts
.filter_map(|e| e.ok())
.filter_map(|e| e.key().ok())
.collect();
assert_eq!(keys, &["mtime", "SCHILY.xattr.security.selinux"]);
*consumed
}
other => panic!("Expected GlobalExtensions, got {:?}", other),
};
let event = parser.parse(&archive[consumed..]).unwrap();
match event {
ParseEvent::Entry { entry, .. } => {
assert_eq!(entry.path_lossy(), "file.txt");
assert!(entry.pax.is_none());
}
other => panic!("Expected Entry, got {:?}", other),
}
}
#[test]
fn test_parser_global_pax_header_need_data() {
let header = make_header(b"pax_global_header", 100, b'g');
let mut parser = Parser::new(Limits::default());
let event = parser.parse(&header).unwrap();
match event {
ParseEvent::NeedData { min_bytes } => {
assert_eq!(min_bytes, 1024); }
other => panic!("Expected NeedData, got {:?}", other),
}
}
#[test]
fn test_parser_global_pax_header_too_large() {
let large_value = "x".repeat(1000);
let mut archive = Vec::new();
archive.extend(make_pax_global_header(&[(
"comment",
large_value.as_bytes(),
)]));
archive.extend_from_slice(&make_header(b"file.txt", 0, b'0'));
archive.extend(zeroes(1024));
let limits = Limits {
max_metadata_size: 100,
..Default::default()
};
let mut parser = Parser::new(limits);
let result = parser.parse(&archive);
assert!(matches!(result, Err(ParseError::MetadataTooLarge { .. })));
}
#[test]
fn test_parser_multiple_global_pax_headers() {
let mut archive = Vec::new();
archive.extend(make_pax_global_header(&[("comment", b"first")]));
archive.extend(make_pax_global_header(&[("comment", b"second")]));
archive.extend_from_slice(&make_header(b"file.txt", 0, b'0'));
archive.extend(zeroes(1024));
let mut parser = Parser::new(Limits::default());
let event = parser.parse(&archive).unwrap();
let consumed1 = match &event {
ParseEvent::GlobalExtensions { consumed, pax_data } => {
let exts: Vec<_> = PaxExtensions::new(pax_data)
.filter_map(|e| e.ok())
.collect();
assert_eq!(exts[0].value_bytes(), b"first");
*consumed
}
other => panic!("Expected GlobalExtensions, got {:?}", other),
};
let event = parser.parse(&archive[consumed1..]).unwrap();
let consumed2 = match &event {
ParseEvent::GlobalExtensions { consumed, pax_data } => {
let exts: Vec<_> = PaxExtensions::new(pax_data)
.filter_map(|e| e.ok())
.collect();
assert_eq!(exts[0].value_bytes(), b"second");
*consumed
}
other => panic!("Expected GlobalExtensions, got {:?}", other),
};
let event = parser.parse(&archive[consumed1 + consumed2..]).unwrap();
assert!(matches!(event, ParseEvent::Entry { .. }));
}
#[test]
fn test_parser_global_pax_does_not_interfere_with_local_pax() {
let mut archive = Vec::new();
archive.extend(make_pax_global_header(&[("mtime", b"1000000000")]));
archive.extend(make_pax_header(&[("path", b"overridden.txt")]));
archive.extend_from_slice(&make_header(b"original.txt", 0, b'0'));
archive.extend(zeroes(1024));
let mut parser = Parser::new(Limits::default());
let event = parser.parse(&archive).unwrap();
let consumed = match &event {
ParseEvent::GlobalExtensions { consumed, .. } => *consumed,
other => panic!("Expected GlobalExtensions, got {:?}", other),
};
let event = parser.parse(&archive[consumed..]).unwrap();
match event {
ParseEvent::Entry { entry, .. } => {
assert_eq!(entry.path.as_ref(), b"overridden.txt");
assert!(entry.pax.is_some());
}
other => panic!("Expected Entry, got {:?}", other),
}
}
#[test]
fn test_parser_orphaned_metadata() {
let mut archive = Vec::new();
archive.extend(make_gnu_long_name(b"some/long/name/here"));
archive.extend(zeroes(1024));
let mut parser = Parser::new(Limits::default());
let result = parser.parse(&archive);
assert!(matches!(result, Err(ParseError::OrphanedMetadata)));
}
#[test]
fn test_parser_orphaned_pax_metadata() {
let mut archive = Vec::new();
archive.extend(make_pax_header(&[("path", b"test")]));
archive.extend(zeroes(1024));
let mut parser = Parser::new(Limits::default());
let result = parser.parse(&archive);
assert!(matches!(result, Err(ParseError::OrphanedMetadata)));
}
#[test]
fn test_parser_duplicate_gnu_long_name() {
let mut archive = Vec::new();
archive.extend(make_gnu_long_name(b"first/long/name"));
archive.extend(make_gnu_long_name(b"second/long/name"));
archive.extend_from_slice(&make_header(b"file.txt", 0, b'0'));
archive.extend(zeroes(1024));
let mut parser = Parser::new(Limits::default());
let result = parser.parse(&archive);
assert!(matches!(result, Err(ParseError::DuplicateGnuLongName)));
}
#[test]
fn test_parser_duplicate_gnu_long_link() {
let mut archive = Vec::new();
archive.extend(make_gnu_long_link(b"first/long/target"));
archive.extend(make_gnu_long_link(b"second/long/target"));
archive.extend_from_slice(&make_link_header(b"link", b"x", b'2'));
archive.extend(zeroes(1024));
let mut parser = Parser::new(Limits::default());
let result = parser.parse(&archive);
assert!(matches!(result, Err(ParseError::DuplicateGnuLongLink)));
}
#[test]
fn test_parser_duplicate_pax_header() {
let mut archive = Vec::new();
archive.extend(make_pax_header(&[("path", b"first")]));
archive.extend(make_pax_header(&[("path", b"second")]));
archive.extend_from_slice(&make_header(b"file.txt", 0, b'0'));
archive.extend(zeroes(1024));
let mut parser = Parser::new(Limits::default());
let result = parser.parse(&archive);
assert!(matches!(result, Err(ParseError::DuplicatePaxHeader)));
}
#[test]
fn test_parser_combined_gnu_pax() {
let gnu_name = "gnu/long/name/".to_string() + &"g".repeat(100);
let pax_path = "pax/should/win/file.txt";
let mut archive = Vec::new();
archive.extend(make_gnu_long_name(gnu_name.as_bytes()));
archive.extend(make_pax_header(&[("path", pax_path.as_bytes())]));
archive.extend_from_slice(&make_header(b"header.txt", 0, b'0'));
archive.extend(zeroes(1024));
let mut parser = Parser::new(Limits::default());
let event = parser.parse(&archive).unwrap();
match event {
ParseEvent::Entry { entry, .. } => {
assert_eq!(entry.path.as_ref(), pax_path.as_bytes());
}
other => panic!("Expected Entry, got {:?}", other),
}
}
#[test]
fn test_parser_gnu_long_name_and_link_combined() {
let long_name = "long/symlink/name/".to_string() + &"n".repeat(100);
let long_target = "long/target/path/".to_string() + &"t".repeat(100);
let mut archive = Vec::new();
archive.extend(make_gnu_long_name(long_name.as_bytes()));
archive.extend(make_gnu_long_link(long_target.as_bytes()));
archive.extend_from_slice(&make_link_header(b"short", b"short", b'2'));
archive.extend(zeroes(1024));
let mut parser = Parser::new(Limits::default());
let event = parser.parse(&archive).unwrap();
match event {
ParseEvent::Entry { entry, .. } => {
assert_eq!(entry.path.as_ref(), long_name.as_bytes());
assert_eq!(
entry.link_target.as_ref().unwrap().as_ref(),
long_target.as_bytes()
);
assert!(entry.is_symlink());
}
other => panic!("Expected Entry, got {:?}", other),
}
}
#[test]
fn test_parser_pax_multiple_entries() {
let mut archive = Vec::new();
archive.extend(make_pax_header(&[("path", b"first/file.txt")]));
archive.extend_from_slice(&make_header(b"f1", 5, b'0'));
let mut content1 = [0u8; 512];
content1[0..5].copy_from_slice(b"hello");
archive.extend_from_slice(&content1);
archive.extend(make_pax_header(&[("path", b"second/file.txt")]));
archive.extend_from_slice(&make_header(b"f2", 5, b'0'));
let mut content2 = [0u8; 512];
content2[0..5].copy_from_slice(b"world");
archive.extend_from_slice(&content2);
archive.extend(zeroes(1024));
let mut parser = Parser::new(Limits::default());
let event1 = parser.parse(&archive).unwrap();
let consumed1 = match &event1 {
ParseEvent::Entry { consumed, entry } => {
assert_eq!(entry.path.as_ref(), b"first/file.txt");
assert_eq!(entry.size, 5);
*consumed
}
other => panic!("Expected Entry, got {:?}", other),
};
let offset = consumed1 + 512;
let event2 = parser.parse(&archive[offset..]).unwrap();
let consumed2 = match &event2 {
ParseEvent::Entry { consumed, entry } => {
assert_eq!(entry.path.as_ref(), b"second/file.txt");
assert_eq!(entry.size, 5);
*consumed
}
other => panic!("Expected Entry, got {:?}", other),
};
let final_offset = offset + consumed2 + 512;
let event3 = parser.parse(&archive[final_offset..]).unwrap();
assert!(matches!(event3, ParseEvent::End { .. }));
}
#[test]
fn test_parser_pax_uname_gname() {
let mut archive = Vec::new();
archive.extend(make_pax_header(&[
("uname", b"testuser"),
("gname", b"testgroup"),
]));
archive.extend_from_slice(&make_header(b"file.txt", 0, b'0'));
archive.extend(zeroes(1024));
let mut parser = Parser::new(Limits::default());
let event = parser.parse(&archive).unwrap();
match event {
ParseEvent::Entry { entry, .. } => {
assert_eq!(entry.uname.as_ref().unwrap().as_ref(), b"testuser");
assert_eq!(entry.gname.as_ref().unwrap().as_ref(), b"testgroup");
}
other => panic!("Expected Entry, got {:?}", other),
}
}
#[test]
fn test_parser_gnu_long_too_large() {
let long_name = "x".repeat(200);
let mut archive = Vec::new();
archive.extend(make_gnu_long_name(long_name.as_bytes()));
archive.extend_from_slice(&make_header(b"file.txt", 0, b'0'));
archive.extend(zeroes(1024));
let limits = Limits {
max_metadata_size: 100,
..Default::default()
};
let mut parser = Parser::new(limits);
let result = parser.parse(&archive);
assert!(matches!(result, Err(ParseError::MetadataTooLarge { .. })));
}
#[test]
fn test_parser_pax_path_too_long() {
let long_path = "x".repeat(200);
let mut archive = Vec::new();
archive.extend(make_pax_header(&[("path", long_path.as_bytes())]));
archive.extend_from_slice(&make_header(b"file.txt", 0, b'0'));
archive.extend(zeroes(1024));
let limits = Limits {
max_path_len: Some(100),
..Default::default()
};
let mut parser = Parser::new(limits);
let result = parser.parse(&archive);
assert!(matches!(
result,
Err(ParseError::PathTooLong {
len: 200,
limit: 100
})
));
}
#[test]
fn test_parser_pax_too_large() {
let large_value = "x".repeat(1000);
let mut archive = Vec::new();
archive.extend(make_pax_header(&[("path", large_value.as_bytes())]));
archive.extend_from_slice(&make_header(b"file.txt", 0, b'0'));
archive.extend(zeroes(1024));
let limits = Limits {
max_metadata_size: 100,
..Default::default()
};
let mut parser = Parser::new(limits);
let result = parser.parse(&archive);
assert!(matches!(result, Err(ParseError::MetadataTooLarge { .. })));
}
#[test]
fn test_parser_need_data_for_gnu_long_content() {
let header = make_header(b"././@LongLink", 200, b'L');
let mut parser = Parser::new(Limits::default());
let event = parser.parse(&header).unwrap();
match event {
ParseEvent::NeedData { min_bytes } => {
assert_eq!(min_bytes, 1024);
}
other => panic!("Expected NeedData, got {:?}", other),
}
}
#[test]
fn test_parser_need_data_for_pax_content() {
let header = make_header(b"PaxHeader/file", 100, b'x');
let mut parser = Parser::new(Limits::default());
let event = parser.parse(&header).unwrap();
match event {
ParseEvent::NeedData { min_bytes } => {
assert_eq!(min_bytes, 1024);
}
other => panic!("Expected NeedData, got {:?}", other),
}
}
#[test]
fn test_need_data_adjusted_through_extension_headers() {
let long_name = "long/path/name/".to_string() + &"x".repeat(90);
let gnu_entry = make_gnu_long_name(long_name.as_bytes());
assert_eq!(gnu_entry.len(), 1024);
let mut parser = Parser::new(Limits::default());
let event = parser.parse(&gnu_entry).unwrap();
match event {
ParseEvent::NeedData { min_bytes } => {
assert_eq!(
min_bytes, 1536,
"NeedData.min_bytes must account for bytes consumed by \
extension headers (1024 + 512 = 1536)"
);
}
other => panic!("Expected NeedData, got {:?}", other),
}
}
#[test]
fn test_cve_2025_62518_pax_size_overrides_header() {
let pax_entries: &[(&str, &[u8])] = &[("size", b"1024")];
let pax_data = make_pax_header(pax_entries);
let file_header = make_header(b"nested.tar", 0, b'0');
let mut content = vec![0u8; 1024];
content[0..9].copy_from_slice(b"MALICIOUS");
content[156] = b'0';
let mut archive = Vec::new();
archive.extend_from_slice(&pax_data);
archive.extend_from_slice(&file_header);
archive.extend_from_slice(&content);
archive.extend(zeroes(1024));
let mut parser = Parser::new(Limits::default());
let event = parser.parse(&archive).unwrap();
let consumed = match &event {
ParseEvent::Entry { consumed, entry } => {
assert_eq!(
entry.size, 1024,
"CVE-2025-62518: Parser MUST use PAX size (1024), not header size (0)"
);
assert_eq!(entry.padded_size(), 1024, "Padded size must match PAX size");
assert_eq!(entry.path_lossy(), "nested.tar");
*consumed
}
other => panic!("Expected Entry, got {:?}", other),
};
let remaining = &archive[consumed + 1024..]; let event = parser.parse(remaining).unwrap();
match event {
ParseEvent::End { .. } => {
}
ParseEvent::Entry { entry, .. } => {
panic!(
"CVE-2025-62518 VULNERABLE: Parser found unexpected entry '{}' \
because it used header size (0) instead of PAX size (1024)",
entry.path_lossy()
);
}
other => panic!("Expected End, got {:?}", other),
}
}
#[test]
fn test_pax_size_affects_parser_state() {
let pax_entries: &[(&str, &[u8])] = &[("size", b"512")];
let pax_data = make_pax_header(pax_entries);
let file_header = make_header(b"test.bin", 0, b'0');
let content = vec![0u8; 512];
let mut archive = Vec::new();
archive.extend_from_slice(&pax_data);
archive.extend_from_slice(&file_header);
archive.extend_from_slice(&content);
archive.extend(zeroes(1024));
let mut parser = Parser::new(Limits::default());
let event = parser.parse(&archive).unwrap();
let size = match event {
ParseEvent::Entry { entry, .. } => entry.size,
other => panic!("Expected Entry, got {:?}", other),
};
assert_eq!(size, 512, "Entry size must reflect PAX override");
}
fn make_archive_with_pax(key: &str, value: &[u8]) -> Vec<u8> {
let mut archive = Vec::new();
archive.extend(make_pax_header(&[(key, value)]));
archive.extend_from_slice(&make_header(b"file.txt", 0, b'0'));
archive.extend(zeroes(1024));
archive
}
#[test]
fn test_strict_rejects_invalid_pax_uid() {
let archive = make_archive_with_pax("uid", b"notanumber");
let mut parser = Parser::new(Limits::default());
let err = parser.parse(&archive).unwrap_err();
assert!(
matches!(err, ParseError::InvalidPaxValue { key: "uid", .. }),
"expected InvalidPaxValue for uid, got {err:?}"
);
}
#[test]
fn test_strict_rejects_invalid_pax_size() {
let archive = make_archive_with_pax("size", b"xyz");
let mut parser = Parser::new(Limits::default());
let err = parser.parse(&archive).unwrap_err();
assert!(matches!(
err,
ParseError::InvalidPaxValue { key: "size", .. }
));
}
#[test]
fn test_strict_rejects_invalid_pax_gid() {
let archive = make_archive_with_pax("gid", b"bad");
let mut parser = Parser::new(Limits::default());
let err = parser.parse(&archive).unwrap_err();
assert!(matches!(
err,
ParseError::InvalidPaxValue { key: "gid", .. }
));
}
#[test]
fn test_strict_rejects_invalid_pax_mtime() {
let archive = make_archive_with_pax("mtime", b"nottime");
let mut parser = Parser::new(Limits::default());
let err = parser.parse(&archive).unwrap_err();
assert!(matches!(
err,
ParseError::InvalidPaxValue { key: PAX_MTIME, .. }
));
}
#[test]
fn test_lenient_ignores_invalid_pax_uid() {
let archive = make_archive_with_pax("uid", b"notanumber");
let mut parser = Parser::new(Limits::default());
parser.set_ignore_pax_errors(true);
let event = parser.parse(&archive).unwrap();
match event {
ParseEvent::Entry { entry, .. } => {
assert_eq!(entry.uid, 1000);
}
other => panic!("Expected Entry, got {other:?}"),
}
}
#[test]
fn test_lenient_ignores_invalid_pax_size() {
let archive = make_archive_with_pax("size", b"xyz");
let mut parser = Parser::new(Limits::default());
parser.set_ignore_pax_errors(true);
let event = parser.parse(&archive).unwrap();
match event {
ParseEvent::Entry { entry, .. } => {
assert_eq!(entry.size, 0);
}
other => panic!("Expected Entry, got {other:?}"),
}
}
#[test]
fn test_strict_accepts_valid_pax_values() {
let mut archive = Vec::new();
archive.extend(make_pax_header(&[
("uid", b"2000"),
("gid", b"3000"),
("size", b"42"),
("mtime", b"1700000000"),
]));
archive.extend_from_slice(&make_header(b"file.txt", 0, b'0'));
archive.extend(zeroes(1024));
let mut parser = Parser::new(Limits::default());
let event = parser.parse(&archive).unwrap();
match event {
ParseEvent::Entry { entry, .. } => {
assert_eq!(entry.uid, 2000);
assert_eq!(entry.gid, 3000);
assert_eq!(entry.size, 42);
assert_eq!(entry.mtime, 1700000000);
}
other => panic!("Expected Entry, got {other:?}"),
}
}
#[test]
fn test_strict_accepts_fractional_mtime() {
let archive = make_archive_with_pax("mtime", b"1234567890.123456");
let mut parser = Parser::new(Limits::default());
let event = parser.parse(&archive).unwrap();
match event {
ParseEvent::Entry { entry, .. } => {
assert_eq!(entry.mtime, 1234567890);
}
other => panic!("Expected Entry, got {other:?}"),
}
}
fn encode_octal_12(value: u64) -> [u8; 12] {
let s = format!("{value:011o}\0");
let mut field = [0u8; 12];
field.copy_from_slice(s.as_bytes());
field
}
fn make_gnu_sparse_header(
name: &[u8],
entries: &[(u64, u64)],
on_disk_size: u64,
real_size: u64,
is_extended: bool,
) -> [u8; HEADER_SIZE] {
assert!(entries.len() <= 4, "max 4 inline sparse descriptors");
let mut header = [0u8; HEADER_SIZE];
let name_len = name.len().min(100);
header[0..name_len].copy_from_slice(&name[..name_len]);
header[100..107].copy_from_slice(b"0000644");
header[108..115].copy_from_slice(b"0001750");
header[116..123].copy_from_slice(b"0001750");
let size_str = format!("{on_disk_size:011o}");
header[124..135].copy_from_slice(size_str.as_bytes());
header[136..147].copy_from_slice(b"14712345670");
header[156] = b'S';
header[257..263].copy_from_slice(GNU_MAGIC);
header[263..265].copy_from_slice(GNU_VERSION);
for (i, &(offset, length)) in entries.iter().enumerate() {
let base = 386 + i * 24;
header[base..base + 12].copy_from_slice(&encode_octal_12(offset));
header[base + 12..base + 24].copy_from_slice(&encode_octal_12(length));
}
header[482] = if is_extended { 1 } else { 0 };
let real_str = format!("{real_size:011o}");
header[483..494].copy_from_slice(real_str.as_bytes());
let hdr = Header::from_bytes(&header);
let checksum = hdr.compute_checksum();
let checksum_str = format!("{checksum:06o}\0 ");
header[148..156].copy_from_slice(checksum_str.as_bytes());
header
}
fn make_gnu_ext_sparse(entries: &[(u64, u64)], is_extended: bool) -> [u8; HEADER_SIZE] {
assert!(entries.len() <= 21, "max 21 descriptors per ext block");
let mut block = [0u8; HEADER_SIZE];
for (i, &(offset, length)) in entries.iter().enumerate() {
let base = i * 24;
block[base..base + 12].copy_from_slice(&encode_octal_12(offset));
block[base + 12..base + 24].copy_from_slice(&encode_octal_12(length));
}
block[504] = if is_extended { 1 } else { 0 };
block
}
#[test]
fn test_sparse_basic() {
let header = make_gnu_sparse_header(
b"sparse.txt",
&[(0x1000, 5), (0x3000, 5)],
10, 0x3005, false,
);
let mut archive = Vec::new();
archive.extend_from_slice(&header);
let mut content = [0u8; HEADER_SIZE];
content[0..5].copy_from_slice(b"hello");
content[5..10].copy_from_slice(b"world");
archive.extend_from_slice(&content);
archive.extend(zeroes(1024));
let mut parser = Parser::new(Limits::default());
let event = parser.parse(&archive).unwrap();
match event {
ParseEvent::SparseEntry {
consumed,
entry,
sparse_map,
real_size,
} => {
assert_eq!(consumed, HEADER_SIZE);
assert_eq!(entry.path_lossy(), "sparse.txt");
assert_eq!(entry.size, 10);
assert_eq!(real_size, 0x3005);
assert_eq!(sparse_map.len(), 2);
assert_eq!(
sparse_map[0],
SparseEntry {
offset: 0x1000,
length: 5
}
);
assert_eq!(
sparse_map[1],
SparseEntry {
offset: 0x3000,
length: 5
}
);
}
other => panic!("Expected SparseEntry, got {other:?}"),
}
}
#[test]
fn test_sparse_no_entries() {
let header = make_gnu_sparse_header(b"empty_sparse.txt", &[], 0, 4096, false);
let mut archive = Vec::new();
archive.extend_from_slice(&header);
archive.extend(zeroes(1024));
let mut parser = Parser::new(Limits::default());
let event = parser.parse(&archive).unwrap();
match event {
ParseEvent::SparseEntry {
sparse_map,
real_size,
entry,
..
} => {
assert!(sparse_map.is_empty());
assert_eq!(real_size, 4096);
assert_eq!(entry.size, 0);
}
other => panic!("Expected SparseEntry, got {other:?}"),
}
}
#[test]
fn test_sparse_four_inline_entries() {
let entries = [(0u64, 512), (1024, 512), (2048, 512), (3072, 512)];
let on_disk: u64 = entries.iter().map(|(_, l)| l).sum();
let real_size = 3072 + 512;
let header = make_gnu_sparse_header(b"four.txt", &entries, on_disk, real_size, false);
let mut archive = Vec::new();
archive.extend_from_slice(&header);
archive.extend(zeroes(on_disk.next_multiple_of(512) as usize));
archive.extend(zeroes(1024));
let mut parser = Parser::new(Limits::default());
let event = parser.parse(&archive).unwrap();
match event {
ParseEvent::SparseEntry {
sparse_map,
real_size: rs,
..
} => {
assert_eq!(sparse_map.len(), 4);
assert_eq!(rs, real_size);
for (i, &(off, len)) in entries.iter().enumerate() {
assert_eq!(sparse_map[i].offset, off);
assert_eq!(sparse_map[i].length, len);
}
}
other => panic!("Expected SparseEntry, got {other:?}"),
}
}
#[test]
fn test_sparse_with_extension_block() {
let inline_entries = [(0u64, 100), (512, 100), (1024, 100), (1536, 100)];
let ext_entries = [(2048u64, 100), (2560, 100)];
let on_disk: u64 = 600; let real_size = 2660;
let header =
make_gnu_sparse_header(b"extended.txt", &inline_entries, on_disk, real_size, true);
let ext = make_gnu_ext_sparse(&ext_entries, false);
let mut archive = Vec::new();
archive.extend_from_slice(&header);
archive.extend_from_slice(&ext);
archive.extend(zeroes(on_disk.next_multiple_of(512) as usize));
archive.extend(zeroes(1024));
let mut parser = Parser::new(Limits::default());
let event = parser.parse(&archive).unwrap();
match event {
ParseEvent::SparseEntry {
consumed,
sparse_map,
real_size: rs,
..
} => {
assert_eq!(consumed, 2 * HEADER_SIZE);
assert_eq!(rs, real_size);
assert_eq!(sparse_map.len(), 6);
assert_eq!(sparse_map[4].offset, 2048);
assert_eq!(sparse_map[5].offset, 2560);
}
other => panic!("Expected SparseEntry, got {other:?}"),
}
}
#[test]
fn test_sparse_multiple_extension_blocks() {
let inline = [(0u64, 10), (100, 10), (200, 10), (300, 10)];
let mut ext1_entries = Vec::new();
for i in 0..21 {
ext1_entries.push((400 + i * 100, 10u64));
}
let ext2_entries = [(2500u64, 10), (2600, 10), (2700, 10)];
let on_disk = 28 * 10u64;
let real_size = 2710;
let header = make_gnu_sparse_header(b"multi_ext.txt", &inline, on_disk, real_size, true);
let ext1 = make_gnu_ext_sparse(&ext1_entries, true);
let ext2 = make_gnu_ext_sparse(&ext2_entries, false);
let mut archive = Vec::new();
archive.extend_from_slice(&header);
archive.extend_from_slice(&ext1);
archive.extend_from_slice(&ext2);
archive.extend(zeroes(on_disk.next_multiple_of(512) as usize));
archive.extend(zeroes(1024));
let mut parser = Parser::new(Limits::default());
let event = parser.parse(&archive).unwrap();
match event {
ParseEvent::SparseEntry {
consumed,
sparse_map,
real_size: rs,
..
} => {
assert_eq!(consumed, 3 * HEADER_SIZE);
assert_eq!(rs, real_size);
assert_eq!(sparse_map.len(), 28);
}
other => panic!("Expected SparseEntry, got {other:?}"),
}
}
#[test]
fn test_sparse_need_data_for_extension() {
let header = make_gnu_sparse_header(
b"need_ext.txt",
&[(0, 100)],
100,
100,
true, );
let mut parser = Parser::new(Limits::default());
let event = parser.parse(&header).unwrap();
match event {
ParseEvent::NeedData { min_bytes } => {
assert_eq!(min_bytes, 2 * HEADER_SIZE);
}
other => panic!("Expected NeedData, got {other:?}"),
}
}
#[test]
fn test_sparse_need_data_chained_extensions() {
let header = make_gnu_sparse_header(b"chain.txt", &[(0, 10)], 20, 20, true);
let ext1 = make_gnu_ext_sparse(&[(10, 10)], true);
let mut input = Vec::new();
input.extend_from_slice(&header);
input.extend_from_slice(&ext1);
let mut parser = Parser::new(Limits::default());
let event = parser.parse(&input).unwrap();
match event {
ParseEvent::NeedData { min_bytes } => {
assert_eq!(min_bytes, 3 * HEADER_SIZE);
}
other => panic!("Expected NeedData, got {other:?}"),
}
}
#[test]
fn test_sparse_not_gnu_header() {
let header = make_header(b"bad_sparse.txt", 0, b'S');
let mut archive = Vec::new();
archive.extend_from_slice(&header);
archive.extend(zeroes(1024));
let mut parser = Parser::new(Limits::default());
let err = parser.parse(&archive).unwrap_err();
assert!(matches!(err, ParseError::SparseNotGnu));
}
#[test]
fn test_sparse_too_many_entries() {
let header = make_gnu_sparse_header(
b"too_many.txt",
&[(0, 10), (100, 10), (200, 10)],
40,
400,
true,
);
let ext = make_gnu_ext_sparse(&[(300, 10)], false);
let mut archive = Vec::new();
archive.extend_from_slice(&header);
archive.extend_from_slice(&ext);
archive.extend(zeroes(512));
archive.extend(zeroes(1024));
let limits = Limits {
max_sparse_entries: 3,
..Default::default()
};
let mut parser = Parser::new(limits);
let err = parser.parse(&archive).unwrap_err();
assert!(matches!(
err,
ParseError::TooManySparseEntries { count: 4, limit: 3 }
));
}
#[test]
fn test_sparse_with_gnu_long_name() {
let long_name = "a/".to_string() + &"x".repeat(200);
let on_disk = 512u64;
let real_size = 8192u64;
let header = make_gnu_sparse_header(b"placeholder", &[(0, 512)], on_disk, real_size, false);
let mut archive = Vec::new();
archive.extend(make_gnu_long_name(long_name.as_bytes()));
archive.extend_from_slice(&header);
archive.extend(zeroes(on_disk as usize)); archive.extend(zeroes(1024));
let mut parser = Parser::new(Limits::default());
let event = parser.parse(&archive).unwrap();
match event {
ParseEvent::SparseEntry {
entry,
sparse_map,
real_size: rs,
..
} => {
assert_eq!(entry.path.as_ref(), long_name.as_bytes());
assert_eq!(rs, real_size);
assert_eq!(sparse_map.len(), 1);
assert_eq!(sparse_map[0].length, 512);
}
other => panic!("Expected SparseEntry, got {other:?}"),
}
}
#[test]
fn test_sparse_need_data_is_side_effect_free() {
let header = make_gnu_sparse_header(b"retry.txt", &[(0, 100)], 200, 300, true);
let ext = make_gnu_ext_sparse(&[(100, 100)], false);
let mut parser = Parser::new(Limits::default());
let event = parser.parse(&header).unwrap();
assert!(matches!(event, ParseEvent::NeedData { .. }));
let mut full = Vec::new();
full.extend_from_slice(&header);
full.extend_from_slice(&ext);
full.extend(zeroes(512)); full.extend(zeroes(1024));
let event = parser.parse(&full).unwrap();
match event {
ParseEvent::SparseEntry {
consumed,
sparse_map,
..
} => {
assert_eq!(consumed, 2 * HEADER_SIZE);
assert_eq!(sparse_map.len(), 2);
}
other => panic!("Expected SparseEntry, got {other:?}"),
}
}
#[test]
fn test_pax_sparse_v01_map() {
let mut archive = Vec::new();
archive.extend(make_pax_header(&[
("GNU.sparse.map", b"0,100,200,100,400,50"),
("GNU.sparse.realsize", b"450"),
("GNU.sparse.name", b"real_name.txt"),
]));
archive.extend_from_slice(&make_header(b"placeholder.txt", 250, b'0'));
archive.extend(zeroes(512)); archive.extend(zeroes(1024));
let mut parser = Parser::new(Limits::default());
let event = parser.parse(&archive).unwrap();
match event {
ParseEvent::SparseEntry {
entry,
sparse_map,
real_size,
..
} => {
assert_eq!(entry.path.as_ref(), b"real_name.txt");
assert_eq!(real_size, 450);
assert_eq!(sparse_map.len(), 3);
assert_eq!(
sparse_map[0],
SparseEntry {
offset: 0,
length: 100
}
);
assert_eq!(
sparse_map[1],
SparseEntry {
offset: 200,
length: 100
}
);
assert_eq!(
sparse_map[2],
SparseEntry {
offset: 400,
length: 50
}
);
}
other => panic!("Expected SparseEntry, got {other:?}"),
}
}
#[test]
fn test_pax_sparse_v00_pairs() {
let mut archive = Vec::new();
archive.extend(make_pax_header(&[
("GNU.sparse.offset", b"0"),
("GNU.sparse.numbytes", b"100"),
("GNU.sparse.offset", b"1024"),
("GNU.sparse.numbytes", b"200"),
("GNU.sparse.realsize", b"1224"),
("GNU.sparse.name", b"v00_sparse.dat"),
]));
archive.extend_from_slice(&make_header(b"placeholder", 300, b'0'));
archive.extend(zeroes(512)); archive.extend(zeroes(1024));
let mut parser = Parser::new(Limits::default());
let event = parser.parse(&archive).unwrap();
match event {
ParseEvent::SparseEntry {
entry,
sparse_map,
real_size,
..
} => {
assert_eq!(entry.path.as_ref(), b"v00_sparse.dat");
assert_eq!(real_size, 1224);
assert_eq!(sparse_map.len(), 2);
assert_eq!(
sparse_map[0],
SparseEntry {
offset: 0,
length: 100
}
);
assert_eq!(
sparse_map[1],
SparseEntry {
offset: 1024,
length: 200
}
);
}
other => panic!("Expected SparseEntry, got {other:?}"),
}
}
#[test]
fn test_pax_sparse_v10_data_prefix() {
let mut archive = Vec::new();
archive.extend(make_pax_header(&[
("GNU.sparse.major", b"1"),
("GNU.sparse.minor", b"0"),
("GNU.sparse.realsize", b"2048"),
("GNU.sparse.name", b"v10_sparse.bin"),
]));
let sparse_data = b"2\n0\n100\n1024\n200\n";
let on_disk_content = 300u64; let total_size = 512 + on_disk_content;
archive.extend_from_slice(&make_header(b"placeholder", total_size, b'0'));
let mut data_block = vec![0u8; 512];
data_block[..sparse_data.len()].copy_from_slice(sparse_data);
archive.extend_from_slice(&data_block);
archive.extend(zeroes(on_disk_content.next_multiple_of(512) as usize));
archive.extend(zeroes(1024));
let mut parser = Parser::new(Limits::default());
let event = parser.parse(&archive).unwrap();
match event {
ParseEvent::SparseEntry {
consumed,
entry,
sparse_map,
real_size,
} => {
assert_eq!(entry.path.as_ref(), b"v10_sparse.bin");
assert_eq!(real_size, 2048);
assert_eq!(sparse_map.len(), 2);
assert_eq!(
sparse_map[0],
SparseEntry {
offset: 0,
length: 100
}
);
assert_eq!(
sparse_map[1],
SparseEntry {
offset: 1024,
length: 200
}
);
assert_eq!(entry.size, on_disk_content);
let pax_hdr_size = archive.len()
- HEADER_SIZE - 512 - on_disk_content.next_multiple_of(512) as usize
- 1024; let expected_consumed = pax_hdr_size + HEADER_SIZE + 512;
assert_eq!(consumed, expected_consumed);
}
other => panic!("Expected SparseEntry, got {other:?}"),
}
}
#[test]
fn test_pax_sparse_v10_need_data() {
let mut archive = Vec::new();
archive.extend(make_pax_header(&[
("GNU.sparse.major", b"1"),
("GNU.sparse.minor", b"0"),
("GNU.sparse.realsize", b"100"),
("GNU.sparse.name", b"v10_need.txt"),
]));
archive.extend_from_slice(&make_header(b"placeholder", 512, b'0'));
let mut parser = Parser::new(Limits::default());
let event = parser.parse(&archive).unwrap();
assert!(
matches!(event, ParseEvent::NeedData { .. }),
"Expected NeedData, got {event:?}"
);
}
#[test]
fn test_pax_sparse_v01_odd_map_values() {
let mut archive = Vec::new();
archive.extend(make_pax_header(&[
("GNU.sparse.map", b"0,100,200"),
("GNU.sparse.realsize", b"300"),
]));
archive.extend_from_slice(&make_header(b"file.txt", 100, b'0'));
archive.extend(zeroes(512));
archive.extend(zeroes(1024));
let mut parser = Parser::new(Limits::default());
let err = parser.parse(&archive).unwrap_err();
assert!(matches!(err, ParseError::InvalidPaxSparseMap(_)));
}
#[test]
fn test_pax_sparse_v10_too_many_entries() {
let mut archive = Vec::new();
archive.extend(make_pax_header(&[
("GNU.sparse.major", b"1"),
("GNU.sparse.minor", b"0"),
("GNU.sparse.realsize", b"100"),
("GNU.sparse.name", b"toomany.txt"),
]));
let sparse_data = b"1000\n";
let total_size = 512u64;
archive.extend_from_slice(&make_header(b"placeholder", total_size, b'0'));
let mut data_block = vec![0u8; 512];
data_block[..sparse_data.len()].copy_from_slice(sparse_data);
archive.extend_from_slice(&data_block);
archive.extend(zeroes(1024));
let limits = Limits {
max_sparse_entries: 100,
..Default::default()
};
let mut parser = Parser::new(limits);
let err = parser.parse(&archive).unwrap_err();
assert!(
matches!(
err,
ParseError::TooManySparseEntries {
count: 1000,
limit: 100
}
),
"got: {err:?}"
);
}
#[test]
fn test_pax_sparse_without_version_is_v00() {
let mut archive = Vec::new();
archive.extend(make_pax_header(&[
("GNU.sparse.offset", b"0"),
("GNU.sparse.numbytes", b"50"),
("GNU.sparse.realsize", b"50"),
]));
archive.extend_from_slice(&make_header(b"noversion.txt", 50, b'0'));
archive.extend(zeroes(512)); archive.extend(zeroes(1024));
let mut parser = Parser::new(Limits::default());
let event = parser.parse(&archive).unwrap();
match event {
ParseEvent::SparseEntry {
sparse_map,
real_size,
..
} => {
assert_eq!(sparse_map.len(), 1);
assert_eq!(
sparse_map[0],
SparseEntry {
offset: 0,
length: 50
}
);
assert_eq!(real_size, 50);
}
other => panic!("Expected SparseEntry, got {other:?}"),
}
}
mod sparse_proptests {
use super::*;
use proptest::prelude::*;
fn sparse_map_strategy(max_entries: usize) -> impl Strategy<Value = Vec<(u64, u64)>> {
proptest::collection::vec((0u64..0x10_000, 1u64..0x1000), 0..=max_entries).prop_map(
|raw| {
let mut entries: Vec<(u64, u64)> = Vec::new();
let mut cursor = 0u64;
for (gap, length) in raw {
let offset = cursor.saturating_add(gap);
entries.push((offset, length));
cursor = offset.saturating_add(length);
}
entries
},
)
}
proptest! {
#[test]
fn test_sparse_roundtrip_inline(
entries in sparse_map_strategy(4),
name_len in 1usize..50,
) {
let name: Vec<u8> = (0..name_len).map(|i| b'a' + (i % 26) as u8).collect();
let on_disk: u64 = entries.iter().map(|(_, l)| l).sum();
let real_size = entries.last().map(|(o, l)| o + l).unwrap_or(0);
let header = make_gnu_sparse_header(
&name,
&entries,
on_disk,
real_size,
false,
);
let mut archive = Vec::new();
archive.extend_from_slice(&header);
archive.extend(zeroes(on_disk.next_multiple_of(512) as usize));
archive.extend(zeroes(1024));
let mut parser = Parser::new(Limits::default());
let event = parser.parse(&archive).unwrap();
match event {
ParseEvent::SparseEntry {
consumed,
sparse_map,
real_size: rs,
entry,
..
} => {
prop_assert_eq!(consumed, HEADER_SIZE);
prop_assert_eq!(&entry.path[..], &name[..]);
prop_assert_eq!(rs, real_size);
prop_assert_eq!(sparse_map.len(), entries.len());
for (i, &(off, len)) in entries.iter().enumerate() {
prop_assert_eq!(sparse_map[i].offset, off);
prop_assert_eq!(sparse_map[i].length, len);
}
}
other => {
return Err(proptest::test_runner::TestCaseError::fail(
format!("Expected SparseEntry, got {other:?}")));
}
}
}
#[test]
fn test_sparse_roundtrip_extended(
entries in sparse_map_strategy(25).prop_filter(
"need >4 entries for extension",
|e| e.len() > 4
),
) {
let on_disk: u64 = entries.iter().map(|(_, l)| l).sum();
let real_size = entries.last().map(|(o, l)| o + l).unwrap_or(0);
let (inline, rest) = entries.split_at(4);
let header = make_gnu_sparse_header(
b"proptest_ext.bin",
inline,
on_disk,
real_size,
!rest.is_empty(),
);
let mut archive = Vec::new();
archive.extend_from_slice(&header);
let chunks: Vec<&[(u64, u64)]> = rest.chunks(21).collect();
for (i, chunk) in chunks.iter().enumerate() {
let is_last = i == chunks.len() - 1;
let ext = make_gnu_ext_sparse(chunk, !is_last);
archive.extend_from_slice(&ext);
}
archive.extend(zeroes(on_disk.next_multiple_of(512) as usize));
archive.extend(zeroes(1024));
let mut parser = Parser::new(Limits::default());
let event = parser.parse(&archive).unwrap();
match event {
ParseEvent::SparseEntry {
consumed,
sparse_map,
real_size: rs,
..
} => {
let expected_blocks = 1 + chunks.len();
prop_assert_eq!(consumed, expected_blocks * HEADER_SIZE);
prop_assert_eq!(rs, real_size);
prop_assert_eq!(sparse_map.len(), entries.len());
for (i, &(off, len)) in entries.iter().enumerate() {
prop_assert_eq!(sparse_map[i].offset, off);
prop_assert_eq!(sparse_map[i].length, len);
}
}
other => {
return Err(proptest::test_runner::TestCaseError::fail(
format!("Expected SparseEntry, got {other:?}")));
}
}
}
#[test]
fn test_sparse_need_data_then_retry(
n_ext_entries in 1usize..10,
) {
let inline = [(0u64, 100), (200, 100), (400, 100), (600, 100)];
let ext_entries: Vec<(u64, u64)> = (0..n_ext_entries)
.map(|i| (800 + i as u64 * 200, 100))
.collect();
let total = 4 + n_ext_entries;
let on_disk = total as u64 * 100;
let real_size = ext_entries.last().map(|(o, l)| o + l).unwrap_or(800);
let header = make_gnu_sparse_header(
b"retry_ext.txt",
&inline,
on_disk,
real_size,
true,
);
let ext = make_gnu_ext_sparse(&ext_entries, false);
let mut parser = Parser::new(Limits::default());
let event = parser.parse(&header).unwrap();
assert!(matches!(event, ParseEvent::NeedData { .. }));
let mut full = Vec::new();
full.extend_from_slice(&header);
full.extend_from_slice(&ext);
full.extend(zeroes(on_disk.next_multiple_of(512) as usize));
full.extend(zeroes(1024));
let event = parser.parse(&full).unwrap();
match event {
ParseEvent::SparseEntry { sparse_map, .. } => {
prop_assert_eq!(sparse_map.len(), total);
}
other => {
return Err(proptest::test_runner::TestCaseError::fail(
format!("Expected SparseEntry, got {other:?}")));
}
}
}
#[test]
fn test_pax_sparse_v00_roundtrip(
entries in sparse_map_strategy(15),
name_len in 1usize..50,
) {
let name: Vec<u8> = (0..name_len).map(|i| b'a' + (i % 26) as u8).collect();
let on_disk: u64 = entries.iter().map(|(_, l)| l).sum();
let real_size = entries.last().map(|(o, l)| o + l).unwrap_or(0);
let mut pax_kv: Vec<(&str, Vec<u8>)> = Vec::new();
for &(offset, length) in &entries {
pax_kv.push(("GNU.sparse.offset", offset.to_string().into_bytes()));
pax_kv.push(("GNU.sparse.numbytes", length.to_string().into_bytes()));
}
pax_kv.push(("GNU.sparse.realsize", real_size.to_string().into_bytes()));
pax_kv.push(("GNU.sparse.name", name.clone()));
let pax_refs: Vec<(&str, &[u8])> =
pax_kv.iter().map(|(k, v)| (*k, v.as_slice())).collect();
let mut archive = Vec::new();
archive.extend(make_pax_header(&pax_refs));
archive.extend_from_slice(&make_header(b"placeholder", on_disk, b'0'));
archive.extend(zeroes(on_disk.next_multiple_of(512) as usize));
archive.extend(zeroes(1024));
let mut parser = Parser::new(Limits::default());
let event = parser.parse(&archive).unwrap();
match event {
ParseEvent::SparseEntry { sparse_map, real_size: rs, entry, .. } => {
prop_assert_eq!(&entry.path[..], &name[..]);
prop_assert_eq!(rs, real_size);
prop_assert_eq!(sparse_map.len(), entries.len());
for (i, &(off, len)) in entries.iter().enumerate() {
prop_assert_eq!(sparse_map[i].offset, off);
prop_assert_eq!(sparse_map[i].length, len);
}
}
ParseEvent::Entry { .. } if entries.is_empty() => {}
other => {
return Err(proptest::test_runner::TestCaseError::fail(
format!("Expected SparseEntry, got {other:?}")));
}
}
}
#[test]
fn test_pax_sparse_v01_roundtrip(
entries in sparse_map_strategy(15),
name_len in 1usize..50,
) {
let name: Vec<u8> = (0..name_len).map(|i| b'a' + (i % 26) as u8).collect();
let on_disk: u64 = entries.iter().map(|(_, l)| l).sum();
let real_size = entries.last().map(|(o, l)| o + l).unwrap_or(0);
let map_str: String = entries
.iter()
.flat_map(|(o, l)| [o.to_string(), l.to_string()])
.collect::<Vec<_>>()
.join(",");
let map_bytes = map_str.into_bytes();
let rs_bytes = real_size.to_string().into_bytes();
let pax_refs: Vec<(&str, &[u8])> = vec![
("GNU.sparse.map", &map_bytes),
("GNU.sparse.realsize", &rs_bytes),
("GNU.sparse.name", &name),
];
let mut archive = Vec::new();
archive.extend(make_pax_header(&pax_refs));
archive.extend_from_slice(&make_header(b"placeholder", on_disk, b'0'));
archive.extend(zeroes(on_disk.next_multiple_of(512) as usize));
archive.extend(zeroes(1024));
let mut parser = Parser::new(Limits::default());
let event = parser.parse(&archive).unwrap();
match event {
ParseEvent::SparseEntry { sparse_map, real_size: rs, entry, .. } => {
prop_assert_eq!(&entry.path[..], &name[..]);
prop_assert_eq!(rs, real_size);
prop_assert_eq!(sparse_map.len(), entries.len());
for (i, &(off, len)) in entries.iter().enumerate() {
prop_assert_eq!(sparse_map[i].offset, off);
prop_assert_eq!(sparse_map[i].length, len);
}
}
ParseEvent::Entry { .. } if entries.is_empty() => {}
other => {
return Err(proptest::test_runner::TestCaseError::fail(
format!("Expected SparseEntry, got {other:?}")));
}
}
}
#[test]
fn test_pax_sparse_v10_roundtrip(
entries in sparse_map_strategy(20),
name_len in 1usize..50,
) {
let name: Vec<u8> = (0..name_len).map(|i| b'a' + (i % 26) as u8).collect();
let on_disk: u64 = entries.iter().map(|(_, l)| l).sum();
let real_size = entries.last().map(|(o, l)| o + l).unwrap_or(0);
let mut map_data = format!("{}\n", entries.len());
for &(offset, length) in &entries {
map_data.push_str(&format!("{offset}\n{length}\n"));
}
let map_bytes = map_data.into_bytes();
let map_padded = map_bytes.len().next_multiple_of(HEADER_SIZE);
let total_size = map_padded as u64 + on_disk;
let rs_bytes = real_size.to_string().into_bytes();
let pax_refs: Vec<(&str, &[u8])> = vec![
("GNU.sparse.major", b"1"),
("GNU.sparse.minor", b"0"),
("GNU.sparse.realsize", &rs_bytes),
("GNU.sparse.name", &name),
];
let mut archive = Vec::new();
archive.extend(make_pax_header(&pax_refs));
archive.extend_from_slice(&make_header(b"placeholder", total_size, b'0'));
let mut data_block = vec![0u8; map_padded];
data_block[..map_bytes.len()].copy_from_slice(&map_bytes);
archive.extend_from_slice(&data_block);
archive.extend(zeroes(on_disk.next_multiple_of(512) as usize));
archive.extend(zeroes(1024));
let mut parser = Parser::new(Limits::default());
let event = parser.parse(&archive).unwrap();
match event {
ParseEvent::SparseEntry { sparse_map, real_size: rs, entry, .. } => {
prop_assert_eq!(&entry.path[..], &name[..]);
prop_assert_eq!(rs, real_size);
prop_assert_eq!(entry.size, on_disk);
prop_assert_eq!(sparse_map.len(), entries.len());
for (i, &(off, len)) in entries.iter().enumerate() {
prop_assert_eq!(sparse_map[i].offset, off);
prop_assert_eq!(sparse_map[i].length, len);
}
}
other => {
return Err(proptest::test_runner::TestCaseError::fail(
format!("Expected SparseEntry, got {other:?}")));
}
}
}
}
}
#[test]
fn test_add_consumed_no_overflow() {
let long_name = b"a]long/path".to_vec();
let gnu_entry = make_gnu_long_name(&long_name);
let first_entry_size = gnu_entry.len();
let pax_size: u64 = u32::MAX as u64 - long_name.len() as u64 - 512;
let pax_header = make_header(b"PaxHeaders/file", pax_size, b'x');
let mut input = Vec::with_capacity(first_entry_size + HEADER_SIZE);
input.extend_from_slice(&gnu_entry);
input.extend_from_slice(&pax_header);
let mut parser = Parser::new(Limits::permissive());
let result = parser.parse(&input);
match result {
Ok(ParseEvent::NeedData { min_bytes }) => {
assert!(
min_bytes > HEADER_SIZE,
"min_bytes should be large, got {min_bytes}"
);
}
Err(_) => {
}
other => panic!(
"Expected NeedData or Err for truncated extension chain, got {:?}",
other
),
}
}
}