use rustc_hash::FxHashSet;
use std::fs::File;
use std::fs::create_dir_all;
use std::io::BufWriter;
use std::io::Read;
use std::io::Write;
use std::path::Path;
use std::path::PathBuf;
use crate::ExtractionError;
use crate::ExtractionReport;
use crate::Result;
use crate::copy::CopyBuffer;
use crate::copy::copy_with_buffer;
use crate::error::QuotaResource;
use crate::security::validator::ValidatedEntry;
use crate::types::DestDir;
use crate::types::SafeSymlink;
#[derive(Debug)]
pub struct DirCache {
created: FxHashSet<PathBuf>,
}
impl DirCache {
#[must_use]
#[inline]
pub fn new() -> Self {
Self::with_capacity(128)
}
#[must_use]
pub fn with_capacity(capacity: usize) -> Self {
use rustc_hash::FxBuildHasher;
Self {
created: FxHashSet::with_capacity_and_hasher(capacity, FxBuildHasher),
}
}
fn cache_ancestors(&mut self, path: &Path) {
let mut current = path;
while !current.as_os_str().is_empty() {
self.created.insert(current.to_path_buf());
match current.parent() {
Some(p) if !p.as_os_str().is_empty() => current = p,
_ => break,
}
}
}
#[inline]
pub fn contains(&self, path: &Path) -> bool {
self.created.contains(path)
}
#[inline]
pub fn ensure_parent_dir(&mut self, file_path: &Path) -> std::io::Result<bool> {
if let Some(parent) = file_path.parent() {
if parent.as_os_str().is_empty() {
return Ok(false);
}
if !self.created.contains(parent) {
create_dir_all(parent)?;
self.cache_ancestors(parent);
return Ok(true);
}
}
Ok(false)
}
#[inline]
pub fn ensure_dir(&mut self, dir_path: &Path) -> std::io::Result<bool> {
if dir_path.as_os_str().is_empty() {
return Ok(false);
}
if !self.created.contains(dir_path) {
create_dir_all(dir_path)?;
self.cache_ancestors(dir_path);
return Ok(true);
}
Ok(false)
}
}
impl Default for DirCache {
fn default() -> Self {
Self::new()
}
}
#[inline]
#[cfg(unix)]
fn create_file_with_mode(path: &Path, mode: Option<u32>) -> std::io::Result<File> {
use std::fs::OpenOptions;
use std::fs::Permissions;
use std::os::unix::fs::OpenOptionsExt;
use std::os::unix::fs::PermissionsExt;
let mut opts = OpenOptions::new();
opts.write(true).create(true).truncate(true);
if let Some(m) = mode {
opts.mode(m);
}
let file = opts.open(path)?;
if let Some(m) = mode {
std::fs::set_permissions(path, Permissions::from_mode(m))?;
}
Ok(file)
}
#[inline]
#[cfg(not(unix))]
fn create_file_with_mode(path: &Path, _mode: Option<u32>) -> std::io::Result<File> {
File::create(path)
}
#[allow(clippy::too_many_arguments)]
#[inline]
pub fn extract_file_generic<R: Read>(
reader: &mut R,
validated: &ValidatedEntry,
dest: &DestDir,
report: &mut ExtractionReport,
expected_size: Option<u64>,
copy_buffer: &mut CopyBuffer,
dir_cache: &mut DirCache,
skip_duplicates: bool,
) -> Result<()> {
let output_path = dest.join(&validated.safe_path);
dir_cache.ensure_parent_dir(&output_path)?;
if output_path.exists() {
if skip_duplicates {
report.files_skipped += 1;
report.warnings.push(format!(
"skipped duplicate entry: {}",
validated.safe_path.as_path().display()
));
return Ok(());
}
return Err(ExtractionError::InvalidArchive(format!(
"duplicate entry: {}",
validated.safe_path.as_path().display()
)));
}
if let Some(size) = expected_size {
report
.bytes_written
.checked_add(size)
.ok_or(ExtractionError::QuotaExceeded {
resource: QuotaResource::IntegerOverflow,
})?;
}
let output_file = create_file_with_mode(&output_path, validated.mode)?;
let mut buffered_writer = BufWriter::with_capacity(64 * 1024, output_file);
let bytes_written = copy_with_buffer(reader, &mut buffered_writer, copy_buffer)?;
buffered_writer.flush()?;
report.files_extracted += 1;
report.bytes_written =
report
.bytes_written
.checked_add(bytes_written)
.ok_or(ExtractionError::QuotaExceeded {
resource: QuotaResource::IntegerOverflow,
})?;
Ok(())
}
pub fn create_directory(
validated: &ValidatedEntry,
dest: &DestDir,
report: &mut ExtractionReport,
dir_cache: &mut DirCache,
) -> Result<()> {
let dir_path = dest.join(&validated.safe_path);
dir_cache.ensure_dir(&dir_path)?;
report.directories_created += 1;
Ok(())
}
#[allow(unused_variables)]
pub fn create_symlink(
safe_symlink: &SafeSymlink,
dest: &DestDir,
report: &mut ExtractionReport,
dir_cache: &mut DirCache,
skip_duplicates: bool,
) -> Result<()> {
#[cfg(unix)]
{
use std::os::unix::fs::symlink;
let link_path = dest.join_path(safe_symlink.link_path());
let target_path = safe_symlink.target_path();
dir_cache.ensure_parent_dir(&link_path)?;
if link_path.exists() || link_path.symlink_metadata().is_ok() {
if skip_duplicates {
report.files_skipped += 1;
report.warnings.push(format!(
"skipped duplicate symlink: {}",
safe_symlink.link_path().display()
));
return Ok(());
}
return Err(ExtractionError::InvalidArchive(format!(
"duplicate entry: {}",
safe_symlink.link_path().display()
)));
}
symlink(target_path, &link_path)?;
report.symlinks_created += 1;
Ok(())
}
#[cfg(not(unix))]
{
Err(ExtractionError::SecurityViolation {
reason: "symlinks are not supported on this platform".into(),
})
}
}
#[cfg(test)]
#[allow(clippy::expect_used, clippy::unwrap_used)]
mod tests {
use super::*;
use crate::ExtractionError;
use crate::ExtractionReport;
use crate::SecurityConfig;
use crate::copy::CopyBuffer;
use crate::security::validator::ValidatedEntry;
use crate::security::validator::ValidatedEntryType;
use crate::types::SafePath;
use std::io::Cursor;
use std::path::PathBuf;
use tempfile::TempDir;
#[test]
fn test_extract_file_generic_integer_overflow_check() {
let temp = TempDir::new().expect("failed to create temp dir");
let dest = DestDir::new(temp.path().to_path_buf()).expect("failed to create dest");
let mut report = ExtractionReport::default();
let mut copy_buffer = CopyBuffer::new();
let mut dir_cache = DirCache::new();
report.bytes_written = u64::MAX - 100;
let expected_size = Some(200u64);
let config = SecurityConfig::default();
let validated = ValidatedEntry {
safe_path: SafePath::validate(&PathBuf::from("test.txt"), &dest, &config)
.expect("path should be valid"),
mode: Some(0o644),
entry_type: ValidatedEntryType::File,
};
let mut reader = Cursor::new(b"test data");
let result = extract_file_generic(
&mut reader,
&validated,
&dest,
&mut report,
expected_size,
&mut copy_buffer,
&mut dir_cache,
true,
);
assert!(result.is_err());
assert!(matches!(
result.unwrap_err(),
ExtractionError::QuotaExceeded {
resource: QuotaResource::IntegerOverflow
}
));
}
#[test]
fn test_dir_cache_basic() {
let temp = TempDir::new().expect("failed to create temp dir");
let mut cache = DirCache::new();
let file_path = temp.path().join("a/b/c/file.txt");
let created = cache
.ensure_parent_dir(&file_path)
.expect("should create dir");
assert!(created, "first call should create directory");
assert!(temp.path().join("a/b/c").exists());
let created = cache
.ensure_parent_dir(&file_path)
.expect("should use cache");
assert!(!created, "second call should use cache");
}
#[test]
fn test_dir_cache_nested_paths() {
let temp = TempDir::new().expect("failed to create temp dir");
let mut cache = DirCache::new();
let file1 = temp.path().join("a/b/c/d/file1.txt");
cache.ensure_parent_dir(&file1).expect("should create");
assert!(temp.path().join("a/b/c/d").exists());
let file2 = temp.path().join("a/b/other.txt");
let created = cache.ensure_parent_dir(&file2).expect("should use cache");
assert!(!created, "ancestor should be cached");
}
#[test]
fn test_dir_cache_ensure_dir() {
let temp = TempDir::new().expect("failed to create temp dir");
let mut cache = DirCache::new();
let dir_path = temp.path().join("a/b/c");
let created = cache.ensure_dir(&dir_path).expect("should create dir");
assert!(created, "first call should create directory");
assert!(dir_path.exists());
let created = cache.ensure_dir(&dir_path).expect("should use cache");
assert!(!created, "second call should use cache");
}
#[test]
fn test_dir_cache_empty_parent() {
use std::path::PathBuf;
let mut cache = DirCache::new();
let file_path = PathBuf::from("file.txt");
let created = cache
.ensure_parent_dir(&file_path)
.expect("should handle empty parent");
assert!(!created, "file with no directory should return false");
}
#[test]
fn test_dir_cache_single_component() {
let temp = TempDir::new().expect("failed to create temp dir");
let mut cache = DirCache::new();
let file_path = temp.path().join("file.txt");
let created = cache
.ensure_parent_dir(&file_path)
.expect("should handle single component");
assert!(created, "parent directory gets cached on first call");
let file_path2 = temp.path().join("file2.txt");
let created = cache
.ensure_parent_dir(&file_path2)
.expect("should use cache");
assert!(!created, "second call uses cached parent");
}
#[test]
fn test_dir_cache_preexisting_directory() {
let temp = TempDir::new().expect("failed to create temp dir");
let mut cache = DirCache::new();
let dir_path = temp.path().join("existing/dir");
std::fs::create_dir_all(&dir_path).expect("should create dir");
let created = cache.ensure_dir(&dir_path).expect("should succeed");
assert!(created, "first call creates cache entry even if dir exists");
let created = cache.ensure_dir(&dir_path).expect("should succeed");
assert!(!created, "second call uses cache");
}
#[test]
fn test_dir_cache_deep_nesting() {
let temp = TempDir::new().expect("failed to create temp dir");
let mut cache = DirCache::new();
let mut path = temp.path().to_path_buf();
for i in 0..100 {
path.push(format!("level{i}"));
}
path.push("file.txt");
let created = cache
.ensure_parent_dir(&path)
.expect("should create deep nesting");
assert!(created, "deep nesting should be created");
let parent = path.parent().expect("should have parent");
assert!(parent.exists(), "all levels should exist");
let created = cache.ensure_parent_dir(&path).expect("should use cache");
assert!(!created, "deep nesting should be cached");
}
#[test]
fn test_dir_cache_multiple_files_same_dir() {
let temp = TempDir::new().expect("failed to create temp dir");
let mut cache = DirCache::new();
let dir = temp.path().join("shared/directory");
let file1 = dir.join("file1.txt");
let created = cache.ensure_parent_dir(&file1).expect("should create dir");
assert!(created, "first file creates directory");
for i in 2..=10 {
let file = dir.join(format!("file{i}.txt"));
let created = cache.ensure_parent_dir(&file).expect("should use cache");
assert!(!created, "file {i} should use cached directory");
}
}
#[test]
fn test_dir_cache_with_capacity() {
let cache = DirCache::with_capacity(1000);
assert_eq!(cache.created.len(), 0, "should start empty");
}
#[test]
fn test_dir_cache_contains() {
let temp = TempDir::new().expect("failed to create temp dir");
let mut cache = DirCache::new();
let dir_path = temp.path().join("a/b/c");
assert!(
!cache.contains(&dir_path),
"should not contain before creation"
);
cache.ensure_dir(&dir_path).expect("should create dir");
assert!(cache.contains(&dir_path), "should contain after creation");
assert!(
cache.contains(&temp.path().join("a/b")),
"ancestor should be cached"
);
assert!(
cache.contains(&temp.path().join("a")),
"ancestor should be cached"
);
}
#[cfg(unix)]
#[test]
fn test_create_file_with_mode_0o644() {
use std::os::unix::fs::PermissionsExt;
let temp = TempDir::new().expect("failed to create temp dir");
let file_path = temp.path().join("test_0o644.txt");
let file = create_file_with_mode(&file_path, Some(0o644)).expect("should create file");
drop(file);
assert!(file_path.exists(), "file should exist");
let metadata = std::fs::metadata(&file_path).expect("should read metadata");
let mode = metadata.permissions().mode();
let permission_bits = mode & 0o777;
assert_eq!(
permission_bits, 0o644,
"file should have permissions 0o644, got 0o{permission_bits:o}"
);
}
#[cfg(unix)]
#[test]
fn test_create_file_with_mode_0o755() {
use std::os::unix::fs::PermissionsExt;
let temp = TempDir::new().expect("failed to create temp dir");
let file_path = temp.path().join("test_0o755.txt");
let file = create_file_with_mode(&file_path, Some(0o755)).expect("should create file");
drop(file);
assert!(file_path.exists(), "file should exist");
let metadata = std::fs::metadata(&file_path).expect("should read metadata");
let mode = metadata.permissions().mode();
let permission_bits = mode & 0o777;
assert_eq!(
permission_bits, 0o755,
"file should have permissions 0o755, got 0o{permission_bits:o}"
);
}
#[cfg(unix)]
#[test]
fn test_create_file_with_mode_0o600() {
use std::os::unix::fs::PermissionsExt;
let temp = TempDir::new().expect("failed to create temp dir");
let file_path = temp.path().join("test_0o600.txt");
let file = create_file_with_mode(&file_path, Some(0o600)).expect("should create file");
drop(file);
assert!(file_path.exists(), "file should exist");
let metadata = std::fs::metadata(&file_path).expect("should read metadata");
let mode = metadata.permissions().mode();
let permission_bits = mode & 0o777;
assert_eq!(
permission_bits, 0o600,
"file should have permissions 0o600, got 0o{permission_bits:o}"
);
}
#[test]
fn test_create_file_with_mode_none() {
let temp = TempDir::new().expect("failed to create temp dir");
let file_path = temp.path().join("test_none.txt");
let file = create_file_with_mode(&file_path, None).expect("should create file");
drop(file);
assert!(file_path.exists(), "file should exist");
}
#[cfg(unix)]
#[test]
fn test_create_file_with_mode_none_unix() {
use std::os::unix::fs::PermissionsExt;
let temp = TempDir::new().expect("failed to create temp dir");
let file_path = temp.path().join("test_none_unix.txt");
let file = create_file_with_mode(&file_path, None).expect("should create file");
drop(file);
assert!(file_path.exists(), "file should exist");
let metadata = std::fs::metadata(&file_path).expect("should read metadata");
let mode = metadata.permissions().mode();
let permission_bits = mode & 0o777;
assert_ne!(
permission_bits, 0,
"file should have non-zero permissions with mode=None"
);
}
#[cfg(unix)]
#[test]
fn test_extract_file_permissions_bypass_umask() {
use std::os::unix::fs::PermissionsExt;
let temp = TempDir::new().expect("failed to create temp dir");
let dest = DestDir::new(temp.path().to_path_buf()).expect("failed to create dest");
let mut report = ExtractionReport::default();
let mut copy_buffer = CopyBuffer::new();
let mut dir_cache = DirCache::new();
let config = SecurityConfig::default();
let sanitized_mode = 0o775u32;
let validated = ValidatedEntry {
safe_path: SafePath::validate(&PathBuf::from("perm_test.txt"), &dest, &config)
.expect("path should be valid"),
mode: Some(sanitized_mode),
entry_type: ValidatedEntryType::File,
};
let mut reader = Cursor::new(b"content");
extract_file_generic(
&mut reader,
&validated,
&dest,
&mut report,
None,
&mut copy_buffer,
&mut dir_cache,
true,
)
.expect("extraction should succeed");
let extracted = temp.path().join("perm_test.txt");
assert!(extracted.exists(), "file should exist");
let metadata = std::fs::metadata(&extracted).expect("should read metadata");
let permission_bits = metadata.permissions().mode() & 0o777;
assert_eq!(
permission_bits, 0o775,
"extracted file must have exact sanitized mode 0o775, got 0o{permission_bits:o}; \
umask may have incorrectly reduced permissions"
);
}
#[cfg(unix)]
#[test]
#[allow(unsafe_code)]
fn test_create_file_with_mode_bypasses_strict_umask() {
use std::os::unix::fs::PermissionsExt;
let temp = TempDir::new().expect("failed to create temp dir");
let file_path = temp.path().join("strict_umask_test.txt");
let previous_umask = unsafe { libc::umask(0o077) };
let result = create_file_with_mode(&file_path, Some(0o755));
unsafe { libc::umask(previous_umask) };
let file = result.expect("should create file under strict umask");
drop(file);
let metadata = std::fs::metadata(&file_path).expect("should read metadata");
let permission_bits = metadata.permissions().mode() & 0o777;
assert_eq!(
permission_bits, 0o755,
"file must have exact mode 0o755 despite strict umask 0o077; \
got 0o{permission_bits:o} — set_permissions bypass not working"
);
}
}