use std::cell::RefCell;
use std::io::ErrorKind;
use std::io::Read;
use std::io::Seek;
use std::path::Path;
use std::path::PathBuf;
use std::process;
use std::sync::atomic::AtomicU64;
use std::sync::atomic::Ordering;
use sevenz_rust2::Archive;
use sevenz_rust2::Password;
static TEMP_COUNTER: AtomicU64 = AtomicU64::new(0);
use crate::ExtractionError;
use crate::ExtractionOptions;
use crate::ExtractionReport;
use crate::Result;
use crate::SecurityConfig;
use crate::error::QuotaResource;
use crate::security::EntryValidator;
use crate::security::validator::ValidatedEntryType;
use crate::types::DestDir;
use crate::types::EntryType;
use super::common;
use super::traits::ArchiveFormat;
struct TempFileGuard {
path: PathBuf,
should_cleanup: bool,
}
impl TempFileGuard {
fn new(path: PathBuf) -> Self {
Self {
path,
should_cleanup: true,
}
}
fn persist(mut self) {
self.should_cleanup = false;
}
}
impl Drop for TempFileGuard {
fn drop(&mut self) {
if self.should_cleanup {
let _ = std::fs::remove_file(&self.path);
}
}
}
#[derive(Debug, Clone)]
struct CachedEntry {
name: String,
size: u64,
is_directory: bool,
}
#[derive(Debug)]
pub struct SevenZArchive<R: Read + Seek> {
source: R,
entries: Vec<CachedEntry>,
is_solid: bool,
}
impl<R: Read + Seek> SevenZArchive<R> {
pub fn new(mut source: R) -> Result<Self> {
let password = Password::empty();
let archive = match Archive::read(&mut source, &password) {
Ok(a) => a,
Err(e) => {
let err_str = e.to_string().to_lowercase();
if err_str.contains("encrypt") || err_str.contains("password") {
return Err(ExtractionError::SecurityViolation {
reason: "encrypted 7z archive detected. Password-protected archives are not supported. \
Decrypt the archive externally and try again.".into(),
});
}
if is_empty_sevenz_archive(&e, &mut source) {
return Ok(Self {
source,
entries: vec![],
is_solid: false,
});
}
return Err(ExtractionError::InvalidArchive(format!(
"failed to open 7z archive: {e}"
)));
}
};
let is_solid = archive.is_solid;
let entries: Vec<CachedEntry> = archive
.files
.iter()
.map(|e| CachedEntry {
name: e.name.clone(),
size: e.size,
is_directory: e.is_directory(),
})
.collect();
source.rewind().map_err(ExtractionError::Io)?;
Ok(Self {
source,
entries,
is_solid,
})
}
}
impl<R: Read + Seek> SevenZArchive<R> {
fn extract_with_callback(
source: &mut R,
dest: &DestDir,
validator: &mut EntryValidator,
dir_cache: &mut common::DirCache,
skip_duplicates: bool,
) -> Result<ExtractionReport> {
let report = RefCell::new(ExtractionReport::new());
let dir_cache = RefCell::new(dir_cache);
let extract_fn = |entry: &sevenz_rust2::ArchiveEntry,
reader: &mut dyn Read,
_dest_dir: &PathBuf|
-> std::result::Result<bool, sevenz_rust2::Error> {
let path = PathBuf::from(&entry.name);
let entry_type = SevenZEntryAdapter::to_entry_type(entry).map_err(|e| {
sevenz_rust2::Error::Other(format!("entry type detection failed: {e}").into())
})?;
let validated = validator
.validate_entry(&path, &entry_type, entry.size, None, None, None)
.map_err(|e| {
sevenz_rust2::Error::Other(format!("validation failed: {e}").into())
})?;
match validated.entry_type {
ValidatedEntryType::Directory => {
let dest_path = dest.join_path(validated.safe_path.as_path());
dir_cache.borrow_mut().ensure_dir(&dest_path)?;
report.borrow_mut().directories_created += 1;
}
ValidatedEntryType::File => {
let dest_path = dest.join_path(validated.safe_path.as_path());
dir_cache.borrow_mut().ensure_parent_dir(&dest_path)?;
if dest_path.exists() {
if skip_duplicates {
report.borrow_mut().files_skipped += 1;
report.borrow_mut().warnings.push(format!(
"skipped duplicate entry: {}",
validated.safe_path.as_path().display()
));
return Ok(true);
}
return Err(sevenz_rust2::Error::Other(
format!(
"duplicate entry: {}",
validated.safe_path.as_path().display()
)
.into(),
));
}
let counter = TEMP_COUNTER.fetch_add(1, Ordering::Relaxed);
let pid = process::id();
let original_name = dest_path
.file_name()
.map_or_else(|| "file".to_string(), |n| n.to_string_lossy().to_string());
let temp_name = format!(".{original_name}.exarch-tmp-{pid}-{counter}");
let temp_path = dest_path.with_file_name(&temp_name);
let guard = TempFileGuard::new(temp_path.clone());
{
let mut temp_file = std::fs::File::create(&temp_path)?;
let bytes_written = std::io::copy(reader, &mut temp_file)?;
report.borrow_mut().bytes_written += bytes_written;
}
std::fs::rename(&temp_path, &dest_path)?;
guard.persist();
report.borrow_mut().files_extracted += 1;
}
_ => {
return Err(sevenz_rust2::Error::Other(
"symlinks/hardlinks not supported".into(),
));
}
}
Ok(true) };
sevenz_rust2::decompress_with_extract_fn(source, dest.as_path(), extract_fn)?;
Ok(report.into_inner())
}
}
impl<R: Read + Seek> ArchiveFormat for SevenZArchive<R> {
fn extract(
&mut self,
output_dir: &Path,
config: &SecurityConfig,
options: &ExtractionOptions,
) -> Result<ExtractionReport> {
if self.is_solid {
if !config.allow_solid_archives {
return Err(ExtractionError::SecurityViolation {
reason: "solid 7z archives are not allowed (enable allow_solid_archives)"
.into(),
});
}
let total_uncompressed: u64 = self
.entries
.iter()
.try_fold(0u64, |acc, e| acc.checked_add(e.size))
.ok_or(ExtractionError::QuotaExceeded {
resource: QuotaResource::TotalSize {
current: u64::MAX,
max: config.max_solid_block_memory,
},
})?;
if total_uncompressed > config.max_solid_block_memory {
return Err(ExtractionError::QuotaExceeded {
resource: QuotaResource::TotalSize {
current: total_uncompressed,
max: config.max_solid_block_memory,
},
});
}
}
let dest = DestDir::new_or_create(output_dir.to_path_buf())?;
let mut prevalidator = EntryValidator::new(config, &dest);
for entry in &self.entries {
let path = Path::new(&entry.name);
let entry_type = if entry.is_directory {
EntryType::Directory
} else {
EntryType::File
};
let validated =
prevalidator.validate_entry(path, &entry_type, entry.size, None, None, None)?;
match validated.entry_type {
ValidatedEntryType::File | ValidatedEntryType::Directory => {
}
_ => {
return Err(ExtractionError::SecurityViolation {
reason: "symlinks/hardlinks not yet supported for 7z".into(),
});
}
}
}
if self.entries.is_empty() {
return Ok(ExtractionReport::new());
}
let mut validator = EntryValidator::new(config, &dest);
let mut dir_cache = common::DirCache::new();
match Self::extract_with_callback(
&mut self.source,
&dest,
&mut validator,
&mut dir_cache,
options.skip_duplicates,
) {
Ok(report) => Ok(report),
Err(e) => {
Err(ExtractionError::PartialExtraction {
source: Box::new(e),
report: ExtractionReport::new(),
})
}
}
}
fn format_name(&self) -> &'static str {
"7z"
}
}
struct SevenZEntryAdapter;
impl SevenZEntryAdapter {
fn to_entry_type(entry: &sevenz_rust2::ArchiveEntry) -> Result<EntryType> {
if Self::is_windows_reparse_point(entry) {
return Err(ExtractionError::SecurityViolation {
reason: format!(
"symlink detected in 7z archive: {} \
(Windows reparse point attribute set). \
7z symlink extraction is not supported due to sevenz-rust2 API limitations.",
entry.name
),
});
}
if entry.is_directory() {
return Ok(EntryType::Directory);
}
Ok(EntryType::File)
}
fn is_windows_reparse_point(entry: &sevenz_rust2::ArchiveEntry) -> bool {
const FILE_ATTRIBUTE_REPARSE_POINT: u32 = 0x0000_0400;
entry.has_windows_attributes
&& (entry.windows_attributes & FILE_ATTRIBUTE_REPARSE_POINT) != 0
}
}
fn is_empty_sevenz_archive<R: Read + Seek>(err: &sevenz_rust2::Error, source: &mut R) -> bool {
const SEVENZ_MAGIC: [u8; 6] = [0x37, 0x7A, 0xBC, 0xAF, 0x27, 0x1C];
const EMPTY_ARCHIVE_SIZE: u64 = 32;
let is_eof = matches!(err, sevenz_rust2::Error::Io(io_err, _) if io_err.kind() == ErrorKind::UnexpectedEof);
if !is_eof {
return false;
}
let Ok(size) = source.seek(std::io::SeekFrom::End(0)) else {
return false;
};
if size != EMPTY_ARCHIVE_SIZE {
return false;
}
let Ok(_) = source.seek(std::io::SeekFrom::Start(0)) else {
return false;
};
let mut magic = [0u8; 6];
source.read_exact(&mut magic).is_ok() && magic == SEVENZ_MAGIC
}
impl From<sevenz_rust2::Error> for ExtractionError {
fn from(err: sevenz_rust2::Error) -> Self {
let err_str = err.to_string();
let err_lower = err_str.to_lowercase();
if err_lower.contains("password") || err_lower.contains("encrypt") {
return Self::SecurityViolation {
reason: format!("encrypted archive: {err_str}"),
};
}
if err_lower.contains("i/o") || err_lower.contains("read") || err_lower.contains("write") {
return Self::Io(std::io::Error::other(err_str));
}
Self::InvalidArchive(format!("7z error: {err_str}"))
}
}
#[cfg(test)]
#[allow(clippy::unwrap_used, clippy::expect_used)]
mod tests {
use super::*;
use std::io::Cursor;
use tempfile::TempDir;
const SEVENZ_MAGIC: [u8; 6] = [0x37, 0x7A, 0xBC, 0xAF, 0x27, 0x1C];
fn load_fixture(name: &str) -> Vec<u8> {
let manifest_dir = env!("CARGO_MANIFEST_DIR");
let fixture_path = std::path::PathBuf::from(manifest_dir)
.parent()
.unwrap()
.parent()
.unwrap()
.join("tests/fixtures")
.join(name);
std::fs::read(&fixture_path).unwrap_or_else(|e| {
panic!(
"Failed to load fixture {name}. Run tests/fixtures/generate_7z_fixtures.sh first. Error: {e}"
)
})
}
#[test]
fn test_format_name() {
let data = SEVENZ_MAGIC.to_vec();
let cursor = Cursor::new(data);
let result = SevenZArchive::new(cursor);
assert!(result.is_err(), "invalid archive should fail to parse");
assert!(matches!(result, Err(ExtractionError::InvalidArchive(_))));
}
#[test]
fn test_invalid_magic_rejected() {
let data = vec![0x00, 0x01, 0x02, 0x03, 0x04, 0x05];
let cursor = Cursor::new(data);
let result = SevenZArchive::new(cursor);
assert!(result.is_err());
assert!(matches!(result, Err(ExtractionError::InvalidArchive(_))));
}
#[test]
fn test_load_fixture_helper() {
let data = load_fixture("simple.7z");
assert!(!data.is_empty());
assert_eq!(&data[0..6], &SEVENZ_MAGIC);
}
#[test]
fn test_extract_simple_file() {
let data = load_fixture("simple.7z");
let cursor = Cursor::new(data);
let mut archive = SevenZArchive::new(cursor).unwrap();
let temp = TempDir::new().unwrap();
let config = SecurityConfig::default();
let report = archive
.extract(temp.path(), &config, &ExtractionOptions::default())
.unwrap();
assert_eq!(report.files_extracted, 2);
assert!(temp.path().join("simple/file1.txt").exists());
assert!(temp.path().join("simple/file2.txt").exists());
let content1 = std::fs::read_to_string(temp.path().join("simple/file1.txt")).unwrap();
assert_eq!(content1, "hello world\n");
}
#[test]
fn test_extract_nested_directories() {
let data = load_fixture("nested-dirs.7z");
let cursor = Cursor::new(data);
let mut archive = SevenZArchive::new(cursor).unwrap();
let temp = TempDir::new().unwrap();
let config = SecurityConfig::default();
let report = archive
.extract(temp.path(), &config, &ExtractionOptions::default())
.unwrap();
assert!(report.files_extracted >= 1);
assert!(temp.path().join("nested/subdir1/subdir2/deep.txt").exists());
assert!(temp.path().join("nested/subdir1/file.txt").exists());
}
#[test]
fn test_solid_archive_rejected() {
let data = load_fixture("solid.7z");
let cursor = Cursor::new(data);
let mut archive = SevenZArchive::new(cursor).unwrap();
let temp = TempDir::new().unwrap();
let result = archive.extract(
temp.path(),
&SecurityConfig::default(),
&ExtractionOptions::default(),
);
assert!(result.is_err());
assert!(matches!(
result.unwrap_err(),
ExtractionError::SecurityViolation { .. }
));
}
#[test]
fn test_encrypted_archive_rejected() {
let data = load_fixture("encrypted.7z");
let cursor = Cursor::new(data);
let result = SevenZArchive::new(cursor);
assert!(result.is_err());
assert!(matches!(
result.unwrap_err(),
ExtractionError::SecurityViolation { .. }
));
}
#[test]
fn test_empty_archive() {
let data = load_fixture("empty.7z");
let cursor = Cursor::new(data);
let mut archive = SevenZArchive::new(cursor).unwrap();
let temp = TempDir::new().unwrap();
let config = SecurityConfig::default();
let report = archive
.extract(temp.path(), &config, &ExtractionOptions::default())
.unwrap();
assert_eq!(report.files_extracted, 0);
assert_eq!(report.directories_created, 0);
}
#[test]
fn test_empty_archive_extract() {
let path = std::path::Path::new("../../tests/fixtures/empty.7z");
let file = std::fs::File::open(path).unwrap();
let mut archive = SevenZArchive::new(file).unwrap();
let temp = TempDir::new().unwrap();
let config = SecurityConfig::default();
let report = archive
.extract(temp.path(), &config, &ExtractionOptions::default())
.unwrap();
assert_eq!(report.files_extracted, 0);
assert_eq!(report.bytes_written, 0);
}
#[test]
fn test_quota_exceeded() {
let data = load_fixture("large-file.7z");
let cursor = Cursor::new(data);
let mut archive = SevenZArchive::new(cursor).unwrap();
let temp = TempDir::new().unwrap();
let config = SecurityConfig {
max_file_size: 1024, ..SecurityConfig::default()
};
let result = archive.extract(temp.path(), &config, &ExtractionOptions::default());
assert!(result.is_err());
assert!(matches!(
result.unwrap_err(),
ExtractionError::QuotaExceeded { .. }
));
}
#[test]
fn test_multiple_files_quota_not_double_counted() {
let data = load_fixture("simple.7z"); let cursor = Cursor::new(data);
let mut archive = SevenZArchive::new(cursor).unwrap();
let temp = TempDir::new().unwrap();
let config = SecurityConfig {
max_file_count: 3, ..SecurityConfig::default()
};
let result = archive.extract(temp.path(), &config, &ExtractionOptions::default());
assert!(
result.is_ok(),
"2 files should not exceed quota of 3: {result:?}"
);
assert_eq!(result.unwrap().files_extracted, 2);
}
#[test]
fn test_path_traversal_integration() {
let data = load_fixture("simple.7z");
let cursor = Cursor::new(data);
let archive = SevenZArchive::new(cursor);
assert!(archive.is_ok());
}
#[test]
fn test_solid_archive_allowed_with_config() {
let data = load_fixture("solid.7z");
let cursor = Cursor::new(data);
let mut archive = SevenZArchive::new(cursor).unwrap();
let temp = TempDir::new().unwrap();
let config = SecurityConfig {
allow_solid_archives: true,
max_solid_block_memory: 100 * 1024 * 1024, ..SecurityConfig::default()
};
let result = archive.extract(temp.path(), &config, &ExtractionOptions::default());
assert!(result.is_ok(), "solid archive should extract: {result:?}");
assert!(result.unwrap().files_extracted > 0);
}
#[test]
fn test_solid_archive_rejected_by_default() {
let data = load_fixture("solid.7z");
let cursor = Cursor::new(data);
let mut archive = SevenZArchive::new(cursor).unwrap();
let temp = TempDir::new().unwrap();
let config = SecurityConfig::default();
let result = archive.extract(temp.path(), &config, &ExtractionOptions::default());
assert!(result.is_err());
assert!(matches!(
result.unwrap_err(),
ExtractionError::SecurityViolation { .. }
));
}
#[test]
fn test_solid_archive_memory_limit_exceeded() {
let data = load_fixture("solid.7z");
let cursor = Cursor::new(data);
let mut archive = SevenZArchive::new(cursor).unwrap();
let temp = TempDir::new().unwrap();
let config = SecurityConfig {
allow_solid_archives: true,
max_solid_block_memory: 1, ..SecurityConfig::default()
};
let result = archive.extract(temp.path(), &config, &ExtractionOptions::default());
assert!(result.is_err());
assert!(matches!(
result.unwrap_err(),
ExtractionError::QuotaExceeded { .. }
));
}
#[test]
fn test_non_solid_archive_unaffected_by_solid_config() {
let data = load_fixture("simple.7z");
let cursor = Cursor::new(data);
let mut archive = SevenZArchive::new(cursor).unwrap();
let temp = TempDir::new().unwrap();
let config = SecurityConfig::default();
let result = archive.extract(temp.path(), &config, &ExtractionOptions::default());
assert!(result.is_ok(), "non-solid should work: {result:?}");
}
#[test]
fn test_is_solid_flag_detected_correctly() {
let solid_data = load_fixture("solid.7z");
let solid_cursor = Cursor::new(solid_data);
let solid_archive = SevenZArchive::new(solid_cursor).unwrap();
assert!(solid_archive.is_solid, "solid.7z should have is_solid=true");
let non_solid_data = load_fixture("simple.7z");
let non_solid_cursor = Cursor::new(non_solid_data);
let non_solid_archive = SevenZArchive::new(non_solid_cursor).unwrap();
assert!(
!non_solid_archive.is_solid,
"simple.7z should have is_solid=false"
);
}
#[test]
fn test_solid_archive_memory_limit_exact_boundary() {
let data = load_fixture("solid.7z");
let archive_for_size = SevenZArchive::new(Cursor::new(data.clone())).unwrap();
let total_size: u64 = archive_for_size.entries.iter().map(|e| e.size).sum();
let mut archive = SevenZArchive::new(Cursor::new(data)).unwrap();
let temp = TempDir::new().unwrap();
let config = SecurityConfig {
allow_solid_archives: true,
max_solid_block_memory: total_size, ..SecurityConfig::default()
};
let result = archive.extract(temp.path(), &config, &ExtractionOptions::default());
assert!(
result.is_ok(),
"exact limit should allow extraction: {result:?}"
);
}
#[test]
fn test_solid_archive_memory_limit_one_under_boundary() {
let data = load_fixture("solid.7z");
let archive_for_size = SevenZArchive::new(Cursor::new(data.clone())).unwrap();
let total_size: u64 = archive_for_size.entries.iter().map(|e| e.size).sum();
if total_size < 2 {
return; }
let mut archive = SevenZArchive::new(Cursor::new(data)).unwrap();
let temp = TempDir::new().unwrap();
let config = SecurityConfig {
allow_solid_archives: true,
max_solid_block_memory: total_size - 1, ..SecurityConfig::default()
};
let result = archive.extract(temp.path(), &config, &ExtractionOptions::default());
assert!(result.is_err(), "one byte under limit should reject");
assert!(matches!(
result.unwrap_err(),
ExtractionError::QuotaExceeded { .. }
));
}
#[test]
fn test_solid_archive_rejected_error_message() {
let data = load_fixture("solid.7z");
let cursor = Cursor::new(data);
let mut archive = SevenZArchive::new(cursor).unwrap();
let temp = TempDir::new().unwrap();
let result = archive.extract(
temp.path(),
&SecurityConfig::default(),
&ExtractionOptions::default(),
);
assert!(result.is_err());
match result.unwrap_err() {
ExtractionError::SecurityViolation { reason } => {
assert!(
reason.contains("solid") && reason.contains("allow_solid_archives"),
"error should mention 'solid' and 'allow_solid_archives', got: {reason}"
);
}
other => panic!("expected SecurityViolation, got {other:?}"),
}
}
#[test]
fn test_windows_reparse_point_detected() {
let mut entry = sevenz_rust2::ArchiveEntry::new_file("symlink.txt");
entry.has_windows_attributes = true;
entry.windows_attributes = 0x0400;
assert!(
SevenZEntryAdapter::is_windows_reparse_point(&entry),
"reparse point attribute should be detected"
);
let result = SevenZEntryAdapter::to_entry_type(&entry);
assert!(result.is_err(), "should return error for reparse point");
assert!(
matches!(
result.unwrap_err(),
ExtractionError::SecurityViolation { .. }
),
"should be SecurityViolation error"
);
}
#[test]
fn test_windows_reparse_point_not_set() {
let mut entry = sevenz_rust2::ArchiveEntry::new_file("file.txt");
entry.has_windows_attributes = true;
entry.windows_attributes = 0x0080;
assert!(
!SevenZEntryAdapter::is_windows_reparse_point(&entry),
"normal file should not be detected as reparse point"
);
let result = SevenZEntryAdapter::to_entry_type(&entry);
assert!(result.is_ok(), "normal file should succeed");
assert_eq!(result.unwrap(), EntryType::File);
}
#[test]
fn test_no_windows_attributes() {
let mut entry = sevenz_rust2::ArchiveEntry::new_file("file.txt");
entry.has_windows_attributes = false;
entry.windows_attributes = 0;
assert!(
!SevenZEntryAdapter::is_windows_reparse_point(&entry),
"entry without Windows attributes should not be detected as reparse point"
);
let result = SevenZEntryAdapter::to_entry_type(&entry);
assert!(result.is_ok(), "file without attributes should succeed");
assert_eq!(result.unwrap(), EntryType::File);
}
#[test]
fn test_windows_reparse_point_with_other_attributes() {
let mut entry = sevenz_rust2::ArchiveEntry::new_file("symlink.txt");
entry.has_windows_attributes = true;
entry.windows_attributes = 0x0400 | 0x0020;
assert!(
SevenZEntryAdapter::is_windows_reparse_point(&entry),
"reparse point should be detected even with other attributes"
);
let result = SevenZEntryAdapter::to_entry_type(&entry);
assert!(result.is_err(), "should return error for reparse point");
}
#[test]
fn test_directory_junction_reparse_point_rejected() {
let mut entry = sevenz_rust2::ArchiveEntry::new_directory("dir/");
entry.has_windows_attributes = true;
entry.windows_attributes = 0x0400;
let result = SevenZEntryAdapter::to_entry_type(&entry);
assert!(result.is_err(), "directory junction should be rejected");
assert!(matches!(
result.unwrap_err(),
ExtractionError::SecurityViolation { .. }
));
}
#[test]
fn test_windows_reparse_point_error_message() {
let mut entry = sevenz_rust2::ArchiveEntry::new_file("link.txt");
entry.has_windows_attributes = true;
entry.windows_attributes = 0x0400;
let result = SevenZEntryAdapter::to_entry_type(&entry);
assert!(result.is_err());
match result.unwrap_err() {
ExtractionError::SecurityViolation { reason } => {
assert!(
reason.contains("symlink") && reason.contains("link.txt"),
"error should mention 'symlink' and entry name, got: {reason}"
);
assert!(
reason.contains("sevenz-rust2"),
"error should mention library limitation, got: {reason}"
);
}
other => panic!("expected SecurityViolation, got {other:?}"),
}
}
}