use std::fs::File;
use std::io::BufReader;
use std::io::Read;
use std::path::Path;
use std::time::Instant;
use smallvec::SmallVec;
use tar::Archive;
use crate::ExtractionError;
use crate::ExtractionOptions;
use crate::ExtractionReport;
use crate::Result;
use crate::SecurityConfig;
use crate::copy::CopyBuffer;
use crate::security::validator::EntryValidator;
use crate::security::validator::ValidatedEntry;
use crate::security::validator::ValidatedEntryType;
use crate::types::DestDir;
use crate::types::EntryType;
use crate::types::SafePath;
use super::common;
use super::traits::ArchiveFormat;
pub struct TarArchive<R: Read> {
inner: Archive<R>,
}
impl<R: Read> TarArchive<R> {
#[must_use]
pub fn new(reader: R) -> Self {
Self {
inner: Archive::new(reader),
}
}
fn process_entry(
entry: tar::Entry<'_, R>,
validator: &mut EntryValidator,
dest: &DestDir,
report: &mut ExtractionReport,
copy_buffer: &mut CopyBuffer,
dir_cache: &mut common::DirCache,
skip_duplicates: bool,
) -> Result<Option<HardlinkInfo>> {
if TarEntryAdapter::is_metadata_entry(&entry) {
return Ok(None);
}
let path = entry
.path()
.map_err(|e| ExtractionError::InvalidArchive(format!("invalid path: {e}")))?
.into_owned();
let entry_type = TarEntryAdapter::to_entry_type(&entry)?;
let size = TarEntryAdapter::get_uncompressed_size(&entry);
let mode = entry.header().mode().ok();
let validated =
validator.validate_entry(&path, &entry_type, size, None, mode, Some(dir_cache))?;
match validated.entry_type {
ValidatedEntryType::File => {
Self::extract_file(
entry,
&validated,
dest,
report,
copy_buffer,
dir_cache,
skip_duplicates,
)?;
Ok(None)
}
ValidatedEntryType::Directory => {
common::create_directory(&validated, dest, report, dir_cache)?;
Ok(None)
}
ValidatedEntryType::Symlink(safe_symlink) => {
common::create_symlink(&safe_symlink, dest, report, dir_cache, skip_duplicates)?;
Ok(None)
}
ValidatedEntryType::Hardlink { target } => {
Ok(Some(HardlinkInfo {
link_path: validated.safe_path,
target_path: target,
}))
}
}
}
fn extract_file(
mut entry: tar::Entry<'_, R>,
validated: &ValidatedEntry,
dest: &DestDir,
report: &mut ExtractionReport,
copy_buffer: &mut CopyBuffer,
dir_cache: &mut common::DirCache,
skip_duplicates: bool,
) -> Result<()> {
let size = Some(entry.size());
common::extract_file_generic(
&mut entry,
validated,
dest,
report,
size,
copy_buffer,
dir_cache,
skip_duplicates,
)
}
fn create_hardlink(
info: &HardlinkInfo,
dest: &DestDir,
report: &mut ExtractionReport,
dir_cache: &mut common::DirCache,
skip_duplicates: bool,
) -> Result<()> {
let link_path = dest.join(&info.link_path);
let target_path = dest.join(&info.target_path);
if !target_path.exists() {
return Err(ExtractionError::InvalidArchive(format!(
"hardlink target does not exist: {}",
info.target_path.as_path().display()
)));
}
dir_cache.ensure_parent_dir(&link_path)?;
if link_path.exists() {
if skip_duplicates {
report.files_skipped += 1;
report.warnings.push(format!(
"skipped duplicate hardlink: {}",
info.link_path.as_path().display()
));
return Ok(());
}
return Err(ExtractionError::InvalidArchive(format!(
"duplicate entry: {}",
info.link_path.as_path().display()
)));
}
let bytes_copied = std::fs::copy(&target_path, &link_path)?;
report.files_extracted += 1;
report.bytes_written += bytes_copied;
Ok(())
}
}
impl<R: Read> ArchiveFormat for TarArchive<R> {
fn extract(
&mut self,
output_dir: &Path,
config: &SecurityConfig,
options: &ExtractionOptions,
) -> Result<ExtractionReport> {
let start = Instant::now();
let skip_duplicates = options.skip_duplicates;
let dest = DestDir::new_or_create(output_dir.to_path_buf())?;
let mut validator = EntryValidator::new(config, &dest);
let mut report = ExtractionReport::new();
let mut hardlinks: SmallVec<[HardlinkInfo; 8]> = SmallVec::new();
let mut copy_buffer = CopyBuffer::new();
let mut dir_cache = common::DirCache::new();
let entries = self
.inner
.entries()
.map_err(|e| ExtractionError::InvalidArchive(format!("failed to read entries: {e}")))?;
for entry_result in entries {
let entry = entry_result.map_err(|e| {
let raw = ExtractionError::InvalidArchive(format!("failed to read entry: {e}"));
if report.total_items() > 0 {
ExtractionError::PartialExtraction {
source: Box::new(raw),
report: std::mem::take(&mut report),
}
} else {
raw
}
})?;
match Self::process_entry(
entry,
&mut validator,
&dest,
&mut report,
&mut copy_buffer,
&mut dir_cache,
skip_duplicates,
) {
Ok(Some(hardlink_info)) => hardlinks.push(hardlink_info),
Ok(None) => {}
Err(e) => {
return Err(if report.total_items() > 0 {
ExtractionError::PartialExtraction {
source: Box::new(e),
report: std::mem::take(&mut report),
}
} else {
e
});
}
}
}
for hardlink_info in &hardlinks {
if let Err(e) = Self::create_hardlink(
hardlink_info,
&dest,
&mut report,
&mut dir_cache,
skip_duplicates,
) {
return Err(if report.total_items() > 0 {
ExtractionError::PartialExtraction {
source: Box::new(e),
report: std::mem::take(&mut report),
}
} else {
e
});
}
}
report.duration = start.elapsed();
Ok(report)
}
fn format_name(&self) -> &'static str {
"tar"
}
}
#[allow(dead_code)] struct HardlinkInfo {
link_path: SafePath,
target_path: SafePath,
}
struct TarEntryAdapter;
impl TarEntryAdapter {
fn is_metadata_entry<R: Read>(tar_entry: &tar::Entry<'_, R>) -> bool {
use tar::EntryType as TarType;
matches!(
tar_entry.header().entry_type(),
TarType::XHeader | TarType::XGlobalHeader | TarType::GNULongName | TarType::GNULongLink
)
}
fn to_entry_type<R: Read>(tar_entry: &tar::Entry<'_, R>) -> Result<EntryType> {
use tar::EntryType as TarType;
match tar_entry.header().entry_type() {
TarType::Regular | TarType::Continuous | TarType::GNUSparse => Ok(EntryType::File),
TarType::Directory => Ok(EntryType::Directory),
TarType::Symlink => {
let target = tar_entry
.link_name()
.map_err(|e| {
ExtractionError::InvalidArchive(format!("failed to read symlink name: {e}"))
})?
.ok_or_else(|| {
ExtractionError::InvalidArchive("symlink missing target".into())
})?
.into_owned();
Ok(EntryType::Symlink { target })
}
TarType::Link => {
let target = tar_entry
.link_name()
.map_err(|e| {
ExtractionError::InvalidArchive(format!(
"failed to read hardlink name: {e}"
))
})?
.ok_or_else(|| {
ExtractionError::InvalidArchive("hardlink missing target".into())
})?
.into_owned();
Ok(EntryType::Hardlink { target })
}
TarType::Char => Err(ExtractionError::SecurityViolation {
reason: "character device entries not supported".into(),
}),
TarType::Block => Err(ExtractionError::SecurityViolation {
reason: "block device entries not supported".into(),
}),
TarType::Fifo => Err(ExtractionError::SecurityViolation {
reason: "FIFO entries not supported".into(),
}),
_ => Err(ExtractionError::SecurityViolation {
reason: format!(
"unsupported entry type: {:?}",
tar_entry.header().entry_type()
),
}),
}
}
fn get_uncompressed_size<R: Read>(tar_entry: &tar::Entry<'_, R>) -> u64 {
tar_entry.size()
}
}
pub fn open_tar_gz<P: AsRef<Path>>(
path: P,
) -> Result<TarArchive<flate2::read::GzDecoder<BufReader<File>>>> {
let file = File::open(path)?;
let buffered = BufReader::new(file);
let decoder = flate2::read::GzDecoder::new(buffered);
Ok(TarArchive::new(decoder))
}
pub fn open_tar_bz2<P: AsRef<Path>>(
path: P,
) -> Result<TarArchive<bzip2::read::BzDecoder<BufReader<File>>>> {
let file = File::open(path)?;
let buffered = BufReader::new(file);
let decoder = bzip2::read::BzDecoder::new(buffered);
Ok(TarArchive::new(decoder))
}
pub fn open_tar_xz<P: AsRef<Path>>(
path: P,
) -> Result<TarArchive<xz2::read::XzDecoder<BufReader<File>>>> {
let file = File::open(path)?;
let buffered = BufReader::new(file);
let decoder = xz2::read::XzDecoder::new(buffered);
Ok(TarArchive::new(decoder))
}
pub fn open_tar_zst<P: AsRef<Path>>(
path: P,
) -> Result<TarArchive<zstd::Decoder<'static, BufReader<File>>>> {
let file = File::open(path)?;
let buffered = BufReader::new(file);
let decoder = zstd::Decoder::with_buffer(buffered)?;
Ok(TarArchive::new(decoder))
}
#[cfg(test)]
#[allow(clippy::unwrap_used, clippy::expect_used)]
mod tests {
use super::*;
use crate::test_utils::create_test_tar;
use std::io::Cursor;
use std::io::Write;
use tempfile::TempDir;
#[test]
fn test_tar_archive_new() {
let tar_data = create_test_tar(vec![]);
let archive = TarArchive::new(Cursor::new(tar_data));
assert_eq!(archive.format_name(), "tar");
}
#[test]
fn test_extract_simple_file() {
let tar_data = create_test_tar(vec![("file.txt", b"hello world")]);
let mut archive = TarArchive::new(Cursor::new(tar_data));
let temp = TempDir::new().unwrap();
let config = SecurityConfig::default();
let report = archive
.extract(temp.path(), &config, &ExtractionOptions::default())
.unwrap();
assert_eq!(report.files_extracted, 1);
assert_eq!(report.directories_created, 0);
assert!(temp.path().join("file.txt").exists());
}
#[test]
fn test_extract_nested_structure() {
let mut builder = tar::Builder::new(Vec::new());
let mut header = tar::Header::new_gnu();
header.set_size(0);
header.set_mode(0o755);
header.set_entry_type(tar::EntryType::Directory);
header.set_cksum();
builder
.append_data(&mut header, "dir1/", &[] as &[u8])
.unwrap();
let mut header = tar::Header::new_gnu();
header.set_size(0);
header.set_mode(0o755);
header.set_entry_type(tar::EntryType::Directory);
header.set_cksum();
builder
.append_data(&mut header, "dir1/dir2/", &[] as &[u8])
.unwrap();
let mut header = tar::Header::new_gnu();
header.set_size(6);
header.set_mode(0o644);
header.set_cksum();
builder
.append_data(&mut header, "dir1/dir2/file.txt", &b"nested"[..])
.unwrap();
let tar_data = builder.into_inner().unwrap();
let mut archive = TarArchive::new(Cursor::new(tar_data));
let temp = TempDir::new().unwrap();
let config = SecurityConfig::default();
let report = archive
.extract(temp.path(), &config, &ExtractionOptions::default())
.unwrap();
assert_eq!(report.files_extracted, 1);
assert_eq!(report.directories_created, 2);
assert!(temp.path().join("dir1/dir2/file.txt").exists());
}
#[test]
#[cfg(unix)]
fn test_extract_symlink() {
let mut builder = tar::Builder::new(Vec::new());
let mut header = tar::Header::new_gnu();
header.set_size(5);
header.set_mode(0o644);
header.set_entry_type(tar::EntryType::Regular);
header.set_cksum();
builder
.append_data(&mut header, "target.txt", &b"data\n"[..])
.unwrap();
let mut header = tar::Header::new_gnu();
header.set_entry_type(tar::EntryType::Symlink);
header.set_link_name("target.txt").unwrap();
header.set_size(0);
header.set_cksum();
builder
.append_data(&mut header, "link.txt", &[] as &[u8])
.unwrap();
let tar_data = builder.into_inner().unwrap();
let mut archive = TarArchive::new(Cursor::new(tar_data));
let temp = TempDir::new().unwrap();
let mut config = SecurityConfig::default();
config.allowed.symlinks = true;
let report = archive
.extract(temp.path(), &config, &ExtractionOptions::default())
.unwrap();
assert_eq!(report.files_extracted, 1);
assert_eq!(report.symlinks_created, 1);
assert!(temp.path().join("link.txt").exists());
}
#[test]
#[cfg(unix)]
fn test_extract_hardlink_two_pass() {
let mut builder = tar::Builder::new(Vec::new());
let mut header = tar::Header::new_gnu();
header.set_entry_type(tar::EntryType::Link);
header.set_link_name("target.txt").unwrap();
header.set_size(0);
header.set_cksum();
builder
.append_data(&mut header, "hardlink.txt", &[] as &[u8])
.unwrap();
let mut header = tar::Header::new_gnu();
header.set_size(5);
header.set_mode(0o644);
header.set_entry_type(tar::EntryType::Regular);
header.set_cksum();
builder
.append_data(&mut header, "target.txt", &b"data\n"[..])
.unwrap();
let tar_data = builder.into_inner().unwrap();
let mut archive = TarArchive::new(Cursor::new(tar_data));
let temp = TempDir::new().unwrap();
let mut config = SecurityConfig::default();
config.allowed.hardlinks = true;
let report = archive
.extract(temp.path(), &config, &ExtractionOptions::default())
.unwrap();
assert_eq!(report.files_extracted, 2);
assert!(temp.path().join("hardlink.txt").exists());
assert!(temp.path().join("target.txt").exists());
}
#[test]
fn test_quota_file_size_exceeded() {
let tar_data = create_test_tar(vec![("large.bin", &vec![0u8; 1000])]);
let mut archive = TarArchive::new(Cursor::new(tar_data));
let temp = TempDir::new().unwrap();
let config = SecurityConfig {
max_file_size: 100,
..Default::default()
};
let result = archive.extract(temp.path(), &config, &ExtractionOptions::default());
assert!(result.is_err());
}
#[test]
fn test_unsupported_entry_type_block_device() {
let mut builder = tar::Builder::new(Vec::new());
let mut header = tar::Header::new_gnu();
header.set_entry_type(tar::EntryType::Block);
header.set_size(0);
header.set_cksum();
builder
.append_data(&mut header, "dev/sda", &[] as &[u8])
.unwrap();
let tar_data = builder.into_inner().unwrap();
let mut archive = TarArchive::new(Cursor::new(tar_data));
let temp = TempDir::new().unwrap();
let config = SecurityConfig::default();
let result = archive.extract(temp.path(), &config, &ExtractionOptions::default());
assert!(result.is_err());
}
#[test]
fn test_unsupported_entry_type_char_device() {
let mut builder = tar::Builder::new(Vec::new());
let mut header = tar::Header::new_gnu();
header.set_entry_type(tar::EntryType::Char);
header.set_size(0);
header.set_cksum();
builder
.append_data(&mut header, "dev/tty", &[] as &[u8])
.unwrap();
let tar_data = builder.into_inner().unwrap();
let mut archive = TarArchive::new(Cursor::new(tar_data));
let temp = TempDir::new().unwrap();
let config = SecurityConfig::default();
let result = archive.extract(temp.path(), &config, &ExtractionOptions::default());
assert!(result.is_err());
}
#[test]
fn test_unsupported_entry_type_fifo() {
let mut builder = tar::Builder::new(Vec::new());
let mut header = tar::Header::new_gnu();
header.set_entry_type(tar::EntryType::Fifo);
header.set_size(0);
header.set_cksum();
builder
.append_data(&mut header, "fifo", &[] as &[u8])
.unwrap();
let tar_data = builder.into_inner().unwrap();
let mut archive = TarArchive::new(Cursor::new(tar_data));
let temp = TempDir::new().unwrap();
let config = SecurityConfig::default();
let result = archive.extract(temp.path(), &config, &ExtractionOptions::default());
assert!(result.is_err());
}
#[test]
fn test_extract_pax_headers_skipped() {
let mut builder = tar::Builder::new(Vec::new());
let mut header = tar::Header::new_gnu();
header.set_entry_type(tar::EntryType::XGlobalHeader);
let pax_data = b"16 comment=hi\n";
header.set_size(pax_data.len() as u64);
header.set_cksum();
builder
.append_data(&mut header, "././@PaxHeader", &pax_data[..])
.unwrap();
let mut header = tar::Header::new_gnu();
header.set_entry_type(tar::EntryType::XHeader);
header.set_size(pax_data.len() as u64);
header.set_cksum();
builder
.append_data(&mut header, "././@PaxHeader", &pax_data[..])
.unwrap();
let mut header = tar::Header::new_gnu();
header.set_size(5);
header.set_mode(0o644);
header.set_cksum();
builder
.append_data(&mut header, "hello.txt", &b"hello"[..])
.unwrap();
let tar_data = builder.into_inner().unwrap();
let mut archive = TarArchive::new(Cursor::new(tar_data));
let temp = TempDir::new().unwrap();
let config = SecurityConfig::default();
let report = archive
.extract(temp.path(), &config, &ExtractionOptions::default())
.unwrap();
assert_eq!(report.files_extracted, 1);
assert!(temp.path().join("hello.txt").exists());
}
#[test]
fn test_extract_gnu_long_name_skipped() {
let mut builder = tar::Builder::new(Vec::new());
let mut header = tar::Header::new_gnu();
header.set_entry_type(tar::EntryType::GNULongName);
let long_name = b"very_long_filename.txt";
header.set_size(long_name.len() as u64);
header.set_cksum();
builder
.append_data(&mut header, "././@LongLink", &long_name[..])
.unwrap();
let mut header = tar::Header::new_gnu();
header.set_size(4);
header.set_mode(0o644);
header.set_cksum();
builder
.append_data(&mut header, "file.txt", &b"data"[..])
.unwrap();
let tar_data = builder.into_inner().unwrap();
let mut archive = TarArchive::new(Cursor::new(tar_data));
let temp = TempDir::new().unwrap();
let config = SecurityConfig::default();
let report = archive
.extract(temp.path(), &config, &ExtractionOptions::default())
.unwrap();
assert_eq!(report.files_extracted, 1);
assert!(temp.path().join("very_long_filename.txt").exists());
}
#[test]
fn test_extract_gnu_long_link_skipped() {
let mut builder = tar::Builder::new(Vec::new());
let mut header = tar::Header::new_gnu();
header.set_entry_type(tar::EntryType::GNULongLink);
let long_link = b"target.txt";
header.set_size(long_link.len() as u64);
header.set_cksum();
builder
.append_data(&mut header, "././@LongLink", &long_link[..])
.unwrap();
let mut header = tar::Header::new_gnu();
header.set_size(4);
header.set_mode(0o644);
header.set_cksum();
builder
.append_data(&mut header, "file.txt", &b"data"[..])
.unwrap();
let tar_data = builder.into_inner().unwrap();
let mut archive = TarArchive::new(Cursor::new(tar_data));
let temp = TempDir::new().unwrap();
let config = SecurityConfig::default();
let report = archive
.extract(temp.path(), &config, &ExtractionOptions::default())
.unwrap();
assert_eq!(report.files_extracted, 1);
assert!(temp.path().join("file.txt").exists());
}
#[test]
fn test_extract_gnu_sparse_as_file() {
let mut builder = tar::Builder::new(Vec::new());
let mut header = tar::Header::new_gnu();
header.set_entry_type(tar::EntryType::GNUSparse);
header.set_size(6);
header.set_mode(0o644);
header.set_cksum();
builder
.append_data(&mut header, "sparse.txt", &b"sparse"[..])
.unwrap();
let tar_data = builder.into_inner().unwrap();
let mut archive = TarArchive::new(Cursor::new(tar_data));
let temp = TempDir::new().unwrap();
let config = SecurityConfig::default();
match archive.extract(temp.path(), &config, &ExtractionOptions::default()) {
Ok(report) => {
assert_eq!(report.files_extracted, 1);
assert!(temp.path().join("sparse.txt").exists());
}
Err(ExtractionError::InvalidArchive(_)) => {
}
Err(e) => panic!("unexpected error for GNUSparse entry: {e}"),
}
}
#[test]
fn test_unsupported_entry_type_unknown_byte() {
let mut builder = tar::Builder::new(Vec::new());
let mut header = tar::Header::new_gnu();
header.set_entry_type(tar::EntryType::__Nonexhaustive(b'Z'));
header.set_size(0);
header.set_cksum();
builder
.append_data(&mut header, "unknown", &[] as &[u8])
.unwrap();
let tar_data = builder.into_inner().unwrap();
let mut archive = TarArchive::new(Cursor::new(tar_data));
let temp = TempDir::new().unwrap();
let config = SecurityConfig::default();
let result = archive.extract(temp.path(), &config, &ExtractionOptions::default());
assert!(
matches!(result, Err(ExtractionError::SecurityViolation { .. })),
"expected SecurityViolation, got: {result:?}"
);
}
#[test]
fn test_extract_continuous_entry_as_file() {
let mut builder = tar::Builder::new(Vec::new());
let mut header = tar::Header::new_gnu();
header.set_entry_type(tar::EntryType::Continuous);
header.set_size(7);
header.set_mode(0o644);
header.set_cksum();
builder
.append_data(&mut header, "cont.txt", &b"content"[..])
.unwrap();
let tar_data = builder.into_inner().unwrap();
let mut archive = TarArchive::new(Cursor::new(tar_data));
let temp = TempDir::new().unwrap();
let config = SecurityConfig::default();
let report = archive
.extract(temp.path(), &config, &ExtractionOptions::default())
.unwrap();
assert_eq!(report.files_extracted, 1);
assert!(temp.path().join("cont.txt").exists());
}
#[test]
fn test_extract_gzip_compressed() {
use flate2::Compression;
use flate2::write::GzEncoder;
let tar_data = create_test_tar(vec![("file.txt", b"compressed")]);
let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
encoder.write_all(&tar_data).unwrap();
let gz_data = encoder.finish().unwrap();
let decoder = flate2::read::GzDecoder::new(Cursor::new(gz_data));
let mut archive = TarArchive::new(decoder);
let temp = TempDir::new().unwrap();
let config = SecurityConfig::default();
let report = archive
.extract(temp.path(), &config, &ExtractionOptions::default())
.unwrap();
assert_eq!(report.files_extracted, 1);
assert!(temp.path().join("file.txt").exists());
}
#[test]
fn test_extract_bzip2_compressed() {
use bzip2::Compression;
use bzip2::write::BzEncoder;
let tar_data = create_test_tar(vec![("file.txt", b"compressed")]);
let mut encoder = BzEncoder::new(Vec::new(), Compression::default());
encoder.write_all(&tar_data).unwrap();
let bz2_data = encoder.finish().unwrap();
let decoder = bzip2::read::BzDecoder::new(Cursor::new(bz2_data));
let mut archive = TarArchive::new(decoder);
let temp = TempDir::new().unwrap();
let config = SecurityConfig::default();
let report = archive
.extract(temp.path(), &config, &ExtractionOptions::default())
.unwrap();
assert_eq!(report.files_extracted, 1);
assert!(temp.path().join("file.txt").exists());
}
#[test]
fn test_extract_xz_compressed() {
use xz2::write::XzEncoder;
let tar_data = create_test_tar(vec![("file.txt", b"compressed")]);
let mut encoder = XzEncoder::new(Vec::new(), 6);
encoder.write_all(&tar_data).unwrap();
let xz_data = encoder.finish().unwrap();
let decoder = xz2::read::XzDecoder::new(Cursor::new(xz_data));
let mut archive = TarArchive::new(decoder);
let temp = TempDir::new().unwrap();
let config = SecurityConfig::default();
let report = archive
.extract(temp.path(), &config, &ExtractionOptions::default())
.unwrap();
assert_eq!(report.files_extracted, 1);
assert!(temp.path().join("file.txt").exists());
}
#[test]
fn test_extract_zstd_compressed() {
let tar_data = create_test_tar(vec![("file.txt", b"compressed")]);
let zst_data = zstd::encode_all(&tar_data[..], 3).unwrap();
let decoder = zstd::Decoder::with_buffer(Cursor::new(zst_data)).unwrap();
let mut archive = TarArchive::new(decoder);
let temp = TempDir::new().unwrap();
let config = SecurityConfig::default();
let report = archive
.extract(temp.path(), &config, &ExtractionOptions::default())
.unwrap();
assert_eq!(report.files_extracted, 1);
assert!(temp.path().join("file.txt").exists());
}
#[test]
fn test_empty_tar_archive() {
let tar_data = create_test_tar(vec![]);
let mut archive = TarArchive::new(Cursor::new(tar_data));
let temp = TempDir::new().unwrap();
let config = SecurityConfig::default();
let report = archive
.extract(temp.path(), &config, &ExtractionOptions::default())
.unwrap();
assert_eq!(report.files_extracted, 0);
assert_eq!(report.directories_created, 0);
}
#[test]
fn test_extract_empty_file() {
let tar_data = create_test_tar(vec![("empty.txt", b"")]);
let mut archive = TarArchive::new(Cursor::new(tar_data));
let temp = TempDir::new().unwrap();
let config = SecurityConfig::default();
let report = archive
.extract(temp.path(), &config, &ExtractionOptions::default())
.unwrap();
assert_eq!(report.files_extracted, 1);
assert!(temp.path().join("empty.txt").exists());
assert_eq!(
std::fs::metadata(temp.path().join("empty.txt"))
.unwrap()
.len(),
0
);
}
#[test]
fn test_extract_multiple_files() {
let tar_data = create_test_tar(vec![
("file1.txt", b"content1"),
("file2.txt", b"content2"),
("file3.txt", b"content3"),
]);
let mut archive = TarArchive::new(Cursor::new(tar_data));
let temp = TempDir::new().unwrap();
let config = SecurityConfig::default();
let report = archive
.extract(temp.path(), &config, &ExtractionOptions::default())
.unwrap();
assert_eq!(report.files_extracted, 3);
assert!(temp.path().join("file1.txt").exists());
assert!(temp.path().join("file2.txt").exists());
assert!(temp.path().join("file3.txt").exists());
}
#[test]
fn test_quota_file_count_exceeded() {
let tar_data = create_test_tar(vec![
("file1.txt", b"a"),
("file2.txt", b"b"),
("file3.txt", b"c"),
]);
let mut archive = TarArchive::new(Cursor::new(tar_data));
let temp = TempDir::new().unwrap();
let config = SecurityConfig {
max_file_count: 2,
..Default::default()
};
let result = archive.extract(temp.path(), &config, &ExtractionOptions::default());
assert!(result.is_err());
}
#[test]
fn test_quota_total_size_exceeded() {
let tar_data = create_test_tar(vec![
("file1.txt", &vec![0u8; 500]),
("file2.txt", &vec![0u8; 600]),
]);
let mut archive = TarArchive::new(Cursor::new(tar_data));
let temp = TempDir::new().unwrap();
let config = SecurityConfig {
max_total_size: 1000,
..Default::default()
};
let result = archive.extract(temp.path(), &config, &ExtractionOptions::default());
assert!(result.is_err());
}
#[test]
#[cfg(unix)]
fn test_file_permissions_preserved() {
use std::os::unix::fs::PermissionsExt;
let mut builder = tar::Builder::new(Vec::new());
let mut header = tar::Header::new_gnu();
header.set_size(4);
header.set_mode(0o755);
header.set_cksum();
builder
.append_data(&mut header, "script.sh", &b"#!/bin/sh"[..])
.unwrap();
let tar_data = builder.into_inner().unwrap();
let mut archive = TarArchive::new(Cursor::new(tar_data));
let temp = TempDir::new().unwrap();
let config = SecurityConfig::default();
let report = archive
.extract(temp.path(), &config, &ExtractionOptions::default())
.unwrap();
assert_eq!(report.files_extracted, 1);
let metadata = std::fs::metadata(temp.path().join("script.sh")).unwrap();
let permissions = metadata.permissions();
assert_eq!(permissions.mode() & 0o777, 0o755);
}
#[test]
#[cfg(unix)]
fn test_permissions_sanitized_setuid_removed() {
use std::os::unix::fs::PermissionsExt;
let mut builder = tar::Builder::new(Vec::new());
let mut header = tar::Header::new_gnu();
header.set_size(4);
header.set_mode(0o4755); header.set_cksum();
builder
.append_data(&mut header, "binary", &b"data"[..])
.unwrap();
let tar_data = builder.into_inner().unwrap();
let mut archive = TarArchive::new(Cursor::new(tar_data));
let temp = TempDir::new().unwrap();
let config = SecurityConfig::default();
let report = archive
.extract(temp.path(), &config, &ExtractionOptions::default())
.unwrap();
assert_eq!(report.files_extracted, 1);
let metadata = std::fs::metadata(temp.path().join("binary")).unwrap();
let permissions = metadata.permissions();
assert_eq!(permissions.mode() & 0o7777, 0o755);
}
#[test]
fn test_bytes_written_tracking() {
let tar_data = create_test_tar(vec![("file1.txt", b"12345"), ("file2.txt", b"67890")]);
let mut archive = TarArchive::new(Cursor::new(tar_data));
let temp = TempDir::new().unwrap();
let config = SecurityConfig::default();
let report = archive
.extract(temp.path(), &config, &ExtractionOptions::default())
.unwrap();
assert_eq!(report.bytes_written, 10);
}
#[test]
fn test_extract_directory_only() {
let mut builder = tar::Builder::new(Vec::new());
let mut header = tar::Header::new_gnu();
header.set_size(0);
header.set_mode(0o755);
header.set_entry_type(tar::EntryType::Directory);
header.set_cksum();
builder
.append_data(&mut header, "mydir/", &[] as &[u8])
.unwrap();
let tar_data = builder.into_inner().unwrap();
let mut archive = TarArchive::new(Cursor::new(tar_data));
let temp = TempDir::new().unwrap();
let config = SecurityConfig::default();
let report = archive
.extract(temp.path(), &config, &ExtractionOptions::default())
.unwrap();
assert_eq!(report.files_extracted, 0);
assert_eq!(report.directories_created, 1);
assert!(temp.path().join("mydir").is_dir());
}
#[test]
#[cfg(unix)]
fn test_symlink_disabled_by_default() {
let mut builder = tar::Builder::new(Vec::new());
let mut header = tar::Header::new_gnu();
header.set_entry_type(tar::EntryType::Symlink);
header.set_link_name("target.txt").unwrap();
header.set_size(0);
header.set_cksum();
builder
.append_data(&mut header, "link.txt", &[] as &[u8])
.unwrap();
let tar_data = builder.into_inner().unwrap();
let mut archive = TarArchive::new(Cursor::new(tar_data));
let temp = TempDir::new().unwrap();
let config = SecurityConfig::default();
let result = archive.extract(temp.path(), &config, &ExtractionOptions::default());
assert!(result.is_err());
}
#[test]
#[cfg(unix)]
fn test_hardlink_disabled_by_default() {
let mut builder = tar::Builder::new(Vec::new());
let mut header = tar::Header::new_gnu();
header.set_entry_type(tar::EntryType::Link);
header.set_link_name("target.txt").unwrap();
header.set_size(0);
header.set_cksum();
builder
.append_data(&mut header, "hardlink.txt", &[] as &[u8])
.unwrap();
let tar_data = builder.into_inner().unwrap();
let mut archive = TarArchive::new(Cursor::new(tar_data));
let temp = TempDir::new().unwrap();
let config = SecurityConfig::default();
let result = archive.extract(temp.path(), &config, &ExtractionOptions::default());
assert!(result.is_err());
}
#[test]
fn test_extraction_duration_recorded() {
let tar_data = create_test_tar(vec![("file.txt", b"test")]);
let mut archive = TarArchive::new(Cursor::new(tar_data));
let temp = TempDir::new().unwrap();
let config = SecurityConfig::default();
let report = archive
.extract(temp.path(), &config, &ExtractionOptions::default())
.unwrap();
assert!(report.duration.as_nanos() > 0);
}
#[test]
fn test_path_traversal_via_dotdot_rejected() {
let mut builder = tar::Builder::new(Vec::new());
let mut header = tar::Header::new_gnu();
header.set_size(5);
header.set_mode(0o644);
let path_bytes = b"subdir/../etc/passwd";
let mut name_field = [0u8; 100];
name_field[..path_bytes.len()].copy_from_slice(path_bytes);
header.as_gnu_mut().unwrap().name = name_field;
header.set_cksum();
builder.append(&header, &b"evil\n"[..]).unwrap();
let tar_data = builder.into_inner().unwrap();
let mut archive = TarArchive::new(Cursor::new(tar_data));
let temp = TempDir::new().unwrap();
let config = SecurityConfig::default();
let result = archive.extract(temp.path(), &config, &ExtractionOptions::default());
assert!(result.is_err());
match result {
Err(ExtractionError::PathTraversal { .. }) => {}
_ => panic!("Expected PathTraversal error"),
}
}
#[test]
fn test_absolute_path_rejected() {
let mut builder = tar::Builder::new(Vec::new());
let mut header = tar::Header::new_gnu();
header.set_size(5);
header.set_mode(0o644);
let path_bytes = b"/etc/shadow";
let mut name_field = [0u8; 100];
name_field[..path_bytes.len()].copy_from_slice(path_bytes);
header.as_gnu_mut().unwrap().name = name_field;
header.set_cksum();
builder.append(&header, &b"evil\n"[..]).unwrap();
let tar_data = builder.into_inner().unwrap();
let mut archive = TarArchive::new(Cursor::new(tar_data));
let temp = TempDir::new().unwrap();
let config = SecurityConfig::default();
let result = archive.extract(temp.path(), &config, &ExtractionOptions::default());
assert!(result.is_err());
match result {
Err(ExtractionError::PathTraversal { .. }) => {}
_ => panic!("Expected PathTraversal error for absolute path"),
}
}
#[test]
#[cfg(unix)]
fn test_symlink_escape_rejected() {
let mut builder = tar::Builder::new(Vec::new());
let mut header = tar::Header::new_gnu();
header.set_size(0);
header.set_mode(0o755);
header.set_entry_type(tar::EntryType::Directory);
header.set_cksum();
builder
.append_data(&mut header, "subdir/", &[] as &[u8])
.unwrap();
let mut header = tar::Header::new_gnu();
header.set_entry_type(tar::EntryType::Symlink);
header.set_link_name("../../etc/passwd").unwrap();
header.set_size(0);
header.set_cksum();
builder
.append_data(&mut header, "subdir/evil_link.txt", &[] as &[u8])
.unwrap();
let tar_data = builder.into_inner().unwrap();
let mut archive = TarArchive::new(Cursor::new(tar_data));
let temp = TempDir::new().unwrap();
let mut config = SecurityConfig::default();
config.allowed.symlinks = true;
let result = archive.extract(temp.path(), &config, &ExtractionOptions::default());
assert!(result.is_err());
match result {
Err(ExtractionError::PartialExtraction { source, .. }) => {
assert!(matches!(*source, ExtractionError::SymlinkEscape { .. }));
}
Err(ExtractionError::SymlinkEscape { .. }) => {}
other => panic!("Expected SymlinkEscape error for symlink escape, got: {other:?}"),
}
}
#[test]
#[cfg(unix)]
fn test_hardlink_target_missing_error() {
let mut builder = tar::Builder::new(Vec::new());
let mut header = tar::Header::new_gnu();
header.set_entry_type(tar::EntryType::Link);
header.set_link_name("nonexistent.txt").unwrap();
header.set_size(0);
header.set_cksum();
builder
.append_data(&mut header, "hardlink.txt", &[] as &[u8])
.unwrap();
let tar_data = builder.into_inner().unwrap();
let mut archive = TarArchive::new(Cursor::new(tar_data));
let temp = TempDir::new().unwrap();
let mut config = SecurityConfig::default();
config.allowed.hardlinks = true;
let result = archive.extract(temp.path(), &config, &ExtractionOptions::default());
assert!(result.is_err());
match result {
Err(ExtractionError::InvalidArchive(msg)) => {
assert!(msg.contains("hardlink target does not exist"));
}
_ => panic!("Expected InvalidArchive error for missing hardlink target"),
}
}
#[test]
#[cfg(unix)]
fn test_hardlink_collection_stack_allocation() {
let mut builder = tar::Builder::new(Vec::new());
let mut header = tar::Header::new_gnu();
header.set_size(5);
header.set_mode(0o644);
header.set_entry_type(tar::EntryType::Regular);
header.set_cksum();
builder
.append_data(&mut header, "target.txt", &b"data\n"[..])
.unwrap();
for i in 0..7 {
let mut header = tar::Header::new_gnu();
header.set_entry_type(tar::EntryType::Link);
header.set_link_name("target.txt").unwrap();
header.set_size(0);
header.set_cksum();
builder
.append_data(&mut header, format!("link{i}.txt"), &[] as &[u8])
.unwrap();
}
let tar_data = builder.into_inner().unwrap();
let mut archive = TarArchive::new(Cursor::new(tar_data));
let temp = TempDir::new().unwrap();
let mut config = SecurityConfig::default();
config.allowed.hardlinks = true;
let report = archive
.extract(temp.path(), &config, &ExtractionOptions::default())
.unwrap();
assert_eq!(report.files_extracted, 8);
for i in 0..7 {
assert!(temp.path().join(format!("link{i}.txt")).exists());
}
}
#[test]
#[cfg(unix)]
fn test_hardlink_collection_heap_spillover() {
let mut builder = tar::Builder::new(Vec::new());
let mut header = tar::Header::new_gnu();
header.set_size(5);
header.set_mode(0o644);
header.set_entry_type(tar::EntryType::Regular);
header.set_cksum();
builder
.append_data(&mut header, "target.txt", &b"data\n"[..])
.unwrap();
for i in 0..20 {
let mut header = tar::Header::new_gnu();
header.set_entry_type(tar::EntryType::Link);
header.set_link_name("target.txt").unwrap();
header.set_size(0);
header.set_cksum();
builder
.append_data(&mut header, format!("link{i}.txt"), &[] as &[u8])
.unwrap();
}
let tar_data = builder.into_inner().unwrap();
let mut archive = TarArchive::new(Cursor::new(tar_data));
let temp = TempDir::new().unwrap();
let mut config = SecurityConfig::default();
config.allowed.hardlinks = true;
let report = archive
.extract(temp.path(), &config, &ExtractionOptions::default())
.unwrap();
assert_eq!(report.files_extracted, 21);
for i in 0..20 {
assert!(temp.path().join(format!("link{i}.txt")).exists());
}
}
fn create_tar_with_pax_size_override(filename: &str, pax_size: u64, data: &[u8]) -> Vec<u8> {
let mut out = Vec::new();
let kv_suffix = format!(" size={pax_size}\n");
let mut total = 1 + kv_suffix.len(); loop {
let digits = total.to_string().len();
let candidate = digits + kv_suffix.len();
if candidate == total {
break;
}
total = candidate;
}
let pax_data = format!("{total}{kv_suffix}");
let pax_bytes = pax_data.as_bytes();
let mut pax_header = tar::Header::new_ustar();
pax_header.set_entry_type(tar::EntryType::XHeader);
pax_header.set_size(pax_bytes.len() as u64);
pax_header.set_mode(0o644);
pax_header.set_path("././@PaxHeader").unwrap();
pax_header.set_cksum();
out.extend_from_slice(pax_header.as_bytes());
out.extend_from_slice(pax_bytes);
let pax_pad = (512 - pax_bytes.len() % 512) % 512;
out.extend(std::iter::repeat_n(0u8, pax_pad));
let mut file_header = tar::Header::new_ustar();
file_header.set_entry_type(tar::EntryType::Regular);
file_header.set_size(0);
file_header.set_mode(0o644);
file_header.set_path(filename).unwrap();
file_header.set_cksum();
out.extend_from_slice(file_header.as_bytes());
out.extend_from_slice(data);
let data_pad = if data.is_empty() {
0
} else {
(512 - data.len() % 512) % 512
};
out.extend(std::iter::repeat_n(0u8, data_pad));
out.extend(std::iter::repeat_n(0u8, 1024));
out
}
#[test]
fn test_pax_size_override_bypasses_max_file_size_quota() {
const PAX_SIZE: u64 = 2 * 1024 * 1024;
let data = vec![0u8; usize::try_from(PAX_SIZE).unwrap()];
let tar_data = create_tar_with_pax_size_override("big.bin", PAX_SIZE, &data);
let mut archive = TarArchive::new(Cursor::new(tar_data));
let temp = TempDir::new().unwrap();
let config = SecurityConfig {
max_file_size: 1024 * 1024,
..Default::default()
};
let result = archive.extract(temp.path(), &config, &ExtractionOptions::default());
assert!(
result.is_err(),
"expected quota error for PAX file size override, got: {result:?}"
);
}
#[test]
fn test_pax_size_override_bypasses_max_total_size_quota() {
const PAX_SIZE: u64 = 600 * 1024;
let data = vec![0u8; usize::try_from(PAX_SIZE).unwrap()];
let tar_data = create_tar_with_pax_size_override("big.bin", PAX_SIZE, &data);
let mut archive = TarArchive::new(Cursor::new(tar_data));
let temp = TempDir::new().unwrap();
let config = SecurityConfig {
max_total_size: 500 * 1024,
..Default::default()
};
let result = archive.extract(temp.path(), &config, &ExtractionOptions::default());
assert!(
result.is_err(),
"expected quota error for PAX total size override, got: {result:?}"
);
}
#[test]
#[cfg(unix)]
fn test_hardlink_collection_boundary() {
let mut builder = tar::Builder::new(Vec::new());
let mut header = tar::Header::new_gnu();
header.set_size(5);
header.set_mode(0o644);
header.set_entry_type(tar::EntryType::Regular);
header.set_cksum();
builder
.append_data(&mut header, "target.txt", &b"data\n"[..])
.unwrap();
for i in 0..8 {
let mut header = tar::Header::new_gnu();
header.set_entry_type(tar::EntryType::Link);
header.set_link_name("target.txt").unwrap();
header.set_size(0);
header.set_cksum();
builder
.append_data(&mut header, format!("link{i}.txt"), &[] as &[u8])
.unwrap();
}
let tar_data = builder.into_inner().unwrap();
let mut archive = TarArchive::new(Cursor::new(tar_data));
let temp = TempDir::new().unwrap();
let mut config = SecurityConfig::default();
config.allowed.hardlinks = true;
let report = archive
.extract(temp.path(), &config, &ExtractionOptions::default())
.unwrap();
assert_eq!(report.files_extracted, 9);
for i in 0..8 {
assert!(temp.path().join(format!("link{i}.txt")).exists());
}
}
fn create_duplicate_entry_tar(path: &str, content1: &[u8], content2: &[u8]) -> Vec<u8> {
let mut builder = tar::Builder::new(Vec::new());
let mut header = tar::Header::new_gnu();
header.set_size(content1.len() as u64);
header.set_mode(0o644);
header.set_cksum();
builder.append_data(&mut header, path, content1).unwrap();
let mut header = tar::Header::new_gnu();
header.set_size(content2.len() as u64);
header.set_mode(0o644);
header.set_cksum();
builder.append_data(&mut header, path, content2).unwrap();
builder.into_inner().unwrap()
}
#[test]
fn test_duplicate_entry_skip_default() {
let tar_data = create_duplicate_entry_tar("legit.txt", b"first", b"second");
let mut archive = TarArchive::new(Cursor::new(tar_data));
let temp = TempDir::new().unwrap();
let config = SecurityConfig::default();
let options = ExtractionOptions::default();
let report = archive.extract(temp.path(), &config, &options).unwrap();
assert_eq!(report.files_extracted, 1);
assert_eq!(report.files_skipped, 1);
assert_eq!(report.warnings.len(), 1);
assert!(report.warnings[0].contains("legit.txt"));
let content = std::fs::read(temp.path().join("legit.txt")).unwrap();
assert_eq!(content, b"first");
}
#[test]
fn test_duplicate_entry_error_when_disabled() {
let tar_data = create_duplicate_entry_tar("legit.txt", b"first", b"second");
let mut archive = TarArchive::new(Cursor::new(tar_data));
let temp = TempDir::new().unwrap();
let config = SecurityConfig::default();
let options = ExtractionOptions {
atomic: false,
skip_duplicates: false,
};
let result = archive.extract(temp.path(), &config, &options);
assert!(result.is_err());
}
}