pub mod gzip;
#[cfg(feature = "rar")]
pub mod rar;
pub mod seven_zip;
pub mod tar;
pub mod targz;
pub mod tarxz;
pub mod tarzst;
pub mod xz;
pub mod zip;
pub mod zstd;
use std::io::{Read, Write};
use std::path::Path;
use crate::detect::ArchiveFormat;
use crate::error::{GeeZipError, GeeZipResult};
#[derive(Debug, Clone)]
pub struct Entry {
pub path: String,
pub size: u64,
pub compressed_size: u64,
pub crc32: Option<u32>,
pub modified: Option<u64>,
pub is_dir: bool,
}
pub(crate) fn datetime_to_timestamp(
year: u64,
month: u64,
day: u64,
hour: u64,
minute: u64,
second: u64,
) -> u64 {
if !(1..=12).contains(&month) || day == 0 {
return 0;
}
let is_leap = |y: u64| (y.is_multiple_of(4) && !y.is_multiple_of(100)) || y.is_multiple_of(400);
let days_in_months: [u64; 12] = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31];
let max_day = if month == 2 && is_leap(year) {
29
} else {
days_in_months[(month - 1) as usize]
};
if day > max_day {
return 0;
}
let mut total_days = 0u64;
for y in 1970..year {
total_days += if is_leap(y) { 366 } else { 365 };
}
for m in 0..(month.saturating_sub(1)) {
let idx = m as usize;
total_days += if idx == 1 && is_leap(year) {
29
} else {
days_in_months[idx]
};
}
total_days += day.saturating_sub(1);
total_days * 86_400 + hour * 3_600 + minute * 60 + second
}
#[derive(Debug, Default)]
pub struct ExtractReport {
pub files_extracted: usize,
pub bytes_extracted: u64,
pub files_skipped: usize,
pub errors: Vec<(String, crate::error::GeeZipError)>,
}
pub trait ArchiveReader: Send {
fn format(&self) -> ArchiveFormat;
fn entries(&mut self) -> GeeZipResult<Vec<Entry>>;
fn extract(&mut self, entry: &Entry, writer: &mut dyn Write) -> GeeZipResult<u64>;
fn extract_all(&mut self, dest: &Path, overwrite: bool) -> GeeZipResult<ExtractReport> {
let entries = self.entries()?;
let mut report = ExtractReport::default();
let dest = normalize_path(dest);
for entry in &entries {
let entry_path = Path::new(&entry.path);
let target = match check_entry_path_safety(entry_path, &entry.path, &dest) {
Ok(t) => t,
Err((name, err)) => {
report.errors.push((name, err));
continue;
}
};
if entry.is_dir {
if let Err(e) = std::fs::create_dir_all(&target) {
report.errors.push((
entry.path.clone(),
crate::error::GeeZipError::io(e, "creating directory"),
));
continue;
}
report.files_extracted += 1;
continue;
}
if let Some(parent) = target.parent() {
if !parent.exists() {
if let Err(e) = std::fs::create_dir_all(parent) {
report.errors.push((
entry.path.clone(),
crate::error::GeeZipError::io(e, "creating parent directory"),
));
continue;
}
}
}
let mut output = if overwrite {
match std::fs::File::create(&target) {
Ok(f) => f,
Err(e) => {
report.errors.push((
entry.path.clone(),
crate::error::GeeZipError::io(
e,
format!("creating output file '{}'", target.display()),
),
));
continue;
}
}
} else {
match std::fs::OpenOptions::new()
.write(true)
.create_new(true)
.open(&target)
{
Ok(f) => f,
Err(e) if e.kind() == std::io::ErrorKind::AlreadyExists => {
report.files_skipped += 1;
report.errors.push((
entry.path.clone(),
crate::error::GeeZipError::clobber_denied(target.display().to_string()),
));
continue;
}
Err(e) => {
report.errors.push((
entry.path.clone(),
crate::error::GeeZipError::io(
e,
format!("creating output file '{}'", target.display()),
),
));
continue;
}
}
};
match self.extract(entry, &mut output) {
Ok(bytes) => {
report.files_extracted += 1;
report.bytes_extracted += bytes;
}
Err(e) => {
report.errors.push((entry.path.clone(), e));
}
}
}
Ok(report)
}
fn extract_all_with_cancel(
&mut self,
dest: &Path,
overwrite: bool,
is_cancelled: &dyn Fn() -> bool,
) -> GeeZipResult<ExtractReport> {
let entries = self.entries()?;
let mut report = ExtractReport::default();
let dest = normalize_path(dest);
for entry in &entries {
if is_cancelled() {
return Err(GeeZipError::Cancelled);
}
let entry_path = Path::new(&entry.path);
let target = match check_entry_path_safety(entry_path, &entry.path, &dest) {
Ok(t) => t,
Err((name, err)) => {
report.errors.push((name, err));
continue;
}
};
if entry.is_dir {
if let Err(e) = std::fs::create_dir_all(&target) {
report.errors.push((
entry.path.clone(),
crate::error::GeeZipError::io(e, "creating directory"),
));
continue;
}
report.files_extracted += 1;
continue;
}
if let Some(parent) = target.parent() {
if !parent.exists() {
if let Err(e) = std::fs::create_dir_all(parent) {
report.errors.push((
entry.path.clone(),
crate::error::GeeZipError::io(e, "creating parent directory"),
));
continue;
}
}
}
let output = if overwrite {
match std::fs::File::create(&target) {
Ok(f) => f,
Err(e) => {
report.errors.push((
entry.path.clone(),
crate::error::GeeZipError::io(
e,
format!("creating output file '{}'", target.display()),
),
));
continue;
}
}
} else {
match std::fs::OpenOptions::new()
.write(true)
.create_new(true)
.open(&target)
{
Ok(f) => f,
Err(e) if e.kind() == std::io::ErrorKind::AlreadyExists => {
report.files_skipped += 1;
report.errors.push((
entry.path.clone(),
crate::error::GeeZipError::clobber_denied(target.display().to_string()),
));
continue;
}
Err(e) => {
report.errors.push((
entry.path.clone(),
crate::error::GeeZipError::io(
e,
format!("creating output file '{}'", target.display()),
),
));
continue;
}
}
};
let mut output = CancellableWriter::new(output, is_cancelled);
match self.extract(entry, &mut output) {
Ok(bytes) => {
if output.was_cancelled() {
return Err(GeeZipError::Cancelled);
}
report.files_extracted += 1;
report.bytes_extracted += bytes;
}
Err(e) => {
report.errors.push((entry.path.clone(), e));
}
}
}
Ok(report)
}
fn set_password(&mut self, _password: &str) -> GeeZipResult<()> {
Ok(())
}
}
pub trait ArchiveWriter: Send {
fn format(&self) -> ArchiveFormat;
fn add_entry_from_reader(&mut self, path: &Path, reader: &mut dyn Read) -> GeeZipResult<()>;
fn finish(self: Box<Self>) -> GeeZipResult<u64>;
fn add_directory(&mut self, _path: &Path) -> GeeZipResult<()> {
Ok(())
}
}
pub(crate) fn normalize_path(path: &Path) -> std::path::PathBuf {
let mut components: Vec<std::ffi::OsString> = Vec::new();
let root_separator = std::ffi::OsString::from(std::path::MAIN_SEPARATOR.to_string());
for component in path.components() {
match component {
std::path::Component::RootDir => {
components.push(root_separator.clone());
}
std::path::Component::CurDir => {
if components.is_empty() {
components.push(std::ffi::OsString::from("."));
}
}
std::path::Component::ParentDir => {
if let Some(last) = components.last() {
if last.as_os_str() == root_separator.as_os_str() {
continue;
} else if last.as_os_str() == "." {
components.pop();
components.push(std::ffi::OsString::from(".."));
} else if last.as_os_str() == ".." {
components.push(std::ffi::OsString::from(".."));
} else {
components.pop();
}
} else {
components.push(std::ffi::OsString::from(".."));
}
}
c => components.push(c.as_os_str().to_os_string()),
}
}
let mut result = std::path::PathBuf::new();
for c in components {
result.push(c);
}
if result.as_os_str().is_empty() {
result.push(".");
}
result
}
pub fn check_entry_path_safety(
entry_path: &Path,
entry_name: &str,
dest: &Path,
) -> std::result::Result<std::path::PathBuf, (String, GeeZipError)> {
if entry_path.has_root() {
return Err((
entry_name.to_owned(),
GeeZipError::PathTraversal {
entry: entry_name.to_owned(),
target: dest.display().to_string(),
},
));
}
#[cfg(windows)]
{
let path_os = entry_name.replace("/", "\\");
if path_os.starts_with("\\\\") {
return Err((
entry_name.to_owned(),
GeeZipError::PathTraversal {
entry: entry_name.to_owned(),
target: dest.display().to_string(),
},
));
}
}
let target = normalize_path(&dest.join(entry_path));
if !target.starts_with(dest) {
return Err((
entry_name.to_owned(),
GeeZipError::PathTraversal {
entry: entry_name.to_owned(),
target: dest.display().to_string(),
},
));
}
Ok(target)
}
pub fn is_entry_path_dangerous(path: &Path) -> bool {
if path.has_root() {
return true;
}
#[cfg(windows)]
{
let path_os = path.to_string_lossy().replace("/", "\\");
if path_os.starts_with("\\\\") {
return true;
}
}
let normalised = normalize_path(path);
let first = normalised.components().next();
matches!(first, Some(std::path::Component::ParentDir))
}
pub(crate) struct CountWriter<W> {
pub(crate) inner: W,
pub(crate) count: u64,
}
impl<W: std::io::Write> std::io::Write for CountWriter<W> {
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
let n = self.inner.write(buf)?;
self.count += n as u64;
Ok(n)
}
fn flush(&mut self) -> std::io::Result<()> {
self.inner.flush()
}
}
pub(crate) struct CancellableWriter<'a, W> {
inner: W,
is_cancelled: &'a dyn Fn() -> bool,
cancelled: bool,
}
impl<'a, W> CancellableWriter<'a, W> {
pub(crate) fn new(inner: W, is_cancelled: &'a dyn Fn() -> bool) -> Self {
CancellableWriter {
inner,
is_cancelled,
cancelled: false,
}
}
pub(crate) fn was_cancelled(&self) -> bool {
self.cancelled
}
}
impl<W: std::io::Write> std::io::Write for CancellableWriter<'_, W> {
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
if (self.is_cancelled)() {
self.cancelled = true;
return Err(std::io::Error::new(
std::io::ErrorKind::Interrupted,
"operation cancelled by user",
));
}
self.inner.write(buf)
}
fn flush(&mut self) -> std::io::Result<()> {
if (self.is_cancelled)() {
self.cancelled = true;
return Err(std::io::Error::new(
std::io::ErrorKind::Interrupted,
"operation cancelled by user",
));
}
self.inner.flush()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn normalize_path_simple() {
let p = normalize_path(Path::new("/a/b/c"));
assert_eq!(p, Path::new("/a/b/c"));
}
#[test]
fn normalize_path_with_dotdot() {
let p = normalize_path(Path::new("/a/b/../c"));
assert_eq!(p, Path::new("/a/c"));
}
#[test]
fn normalize_path_with_curdir() {
let p = normalize_path(Path::new("/a/./b"));
assert_eq!(p, Path::new("/a/b"));
}
#[test]
fn normalize_path_below_root_escape() {
let p = normalize_path(Path::new("/a/../../c"));
assert_eq!(p, Path::new("/c"));
}
#[test]
fn normalize_path_preserves_leading_curdir() {
assert_eq!(normalize_path(Path::new(".")), Path::new("."));
assert_eq!(normalize_path(Path::new("./foo")), Path::new("./foo"));
assert_eq!(normalize_path(Path::new("./a/b")), Path::new("./a/b"));
}
#[test]
fn normalize_path_with_leading_dotdot() {
assert_eq!(normalize_path(Path::new("../foo")), Path::new("../foo"));
assert_eq!(normalize_path(Path::new("./../foo")), Path::new("../foo"));
assert_eq!(normalize_path(Path::new("./..")), Path::new(".."));
assert_eq!(normalize_path(Path::new("./a/../../b")), Path::new("../b"));
}
#[test]
fn normalize_path_multiple_dotdot() {
assert_eq!(
normalize_path(Path::new("../../foo")),
Path::new("../../foo")
);
assert_eq!(
normalize_path(Path::new("./../../foo")),
Path::new("../../foo")
);
assert_eq!(normalize_path(Path::new("a/../..")), Path::new(".."));
}
#[test]
fn normalize_path_complex_traversal() {
assert_eq!(
normalize_path(Path::new("a/b/../../../c")),
Path::new("../c")
);
assert_eq!(normalize_path(Path::new("a/./../../b")), Path::new("../b"));
assert_eq!(
normalize_path(Path::new("a/./../b/.././c/../d")),
Path::new("d")
);
}
#[test]
fn normalize_path_ipv6_root() {
assert_eq!(normalize_path(Path::new("/a/b/../c")), Path::new("/a/c"));
assert_eq!(normalize_path(Path::new("/a/../../c")), Path::new("/c"));
}
#[test]
fn normalize_path_curdir_peers() {
assert_eq!(normalize_path(Path::new(".")), Path::new("."));
assert_eq!(normalize_path(Path::new("./foo")), Path::new("./foo"));
assert_eq!(normalize_path(Path::new("./a/b")), Path::new("./a/b"));
}
#[test]
fn cancellable_writer_detects_cancellation() {
let mut buf = Vec::new();
let cancelled = true;
let is_cancelled = || cancelled;
let mut writer = CancellableWriter::new(&mut buf, &is_cancelled);
let result = writer.write(b"hello");
assert!(result.is_err());
assert_eq!(result.unwrap_err().kind(), std::io::ErrorKind::Interrupted);
assert!(writer.was_cancelled());
assert!(buf.is_empty());
}
#[test]
fn cancellable_writer_passes_through_when_not_cancelled() {
let mut buf = Vec::new();
let cancelled = false;
let is_cancelled = || cancelled;
let mut writer = CancellableWriter::new(&mut buf, &is_cancelled);
let n = writer.write(b"hello").unwrap();
assert_eq!(n, 5);
assert!(!writer.was_cancelled());
assert_eq!(&buf, b"hello");
}
#[test]
fn datetime_to_timestamp_rejects_invalid_month_day() {
assert_eq!(datetime_to_timestamp(2026, 0, 15, 0, 0, 0), 0);
assert_eq!(datetime_to_timestamp(2026, 13, 15, 0, 0, 0), 0);
assert_eq!(datetime_to_timestamp(2026, 6, 0, 0, 0, 0), 0);
assert_eq!(datetime_to_timestamp(2026, 6, 31, 0, 0, 0), 0); assert!(datetime_to_timestamp(2026, 6, 15, 0, 0, 0) > 0); assert!(datetime_to_timestamp(2026, 6, 15, 0, 0, 0) > 0);
}
#[test]
fn cancellable_writer_flush_detects_cancellation() {
use std::cell::Cell;
let cancelled = Cell::new(false);
let is_cancelled = || cancelled.get();
let mut buf = Vec::new();
let mut writer = CancellableWriter::new(&mut buf, &is_cancelled);
writer.write_all(b"data").unwrap();
cancelled.set(true);
let result = writer.flush();
assert!(result.is_err());
assert_eq!(result.unwrap_err().kind(), std::io::ErrorKind::Interrupted);
assert!(writer.was_cancelled());
assert_eq!(&buf, b"data");
}
#[test]
#[cfg(not(windows))]
fn check_entry_path_safety_rejects_absolute() {
let dest = Path::new("/tmp/out");
let result = check_entry_path_safety(Path::new("/etc/passwd"), "/etc/passwd", dest);
assert!(result.is_err());
let (name, err) = result.unwrap_err();
assert_eq!(name, "/etc/passwd");
assert!(matches!(err, GeeZipError::PathTraversal { .. }));
}
#[test]
fn check_entry_path_safety_rejects_traversal() {
let dest = Path::new("/tmp/out");
let result = check_entry_path_safety(Path::new("../etc/passwd"), "../etc/passwd", dest);
assert!(result.is_err());
let (name, err) = result.unwrap_err();
assert_eq!(name, "../etc/passwd");
assert!(matches!(err, GeeZipError::PathTraversal { .. }));
}
#[test]
fn check_entry_path_safety_accepts_normal() {
let dest = Path::new("/tmp/out");
let result = check_entry_path_safety(Path::new("file.txt"), "file.txt", dest);
assert!(result.is_ok());
let target = result.unwrap();
assert_eq!(target, Path::new("/tmp/out/file.txt"));
}
#[test]
fn normalize_path_edge_cases() {
assert_eq!(normalize_path(Path::new("")), Path::new("."));
assert_eq!(normalize_path(Path::new("/")), Path::new("/"));
assert_eq!(normalize_path(Path::new("foo/.")), Path::new("foo"));
assert_eq!(normalize_path(Path::new("a//b")), Path::new("a/b"));
assert!(!normalize_path(Path::new("a//b")).as_os_str().is_empty());
}
#[test]
fn datetime_to_timestamp_leap_year() {
let ts = datetime_to_timestamp(2024, 2, 29, 12, 0, 0);
assert!(ts > 0, "leap year Feb 29 should produce a valid timestamp");
assert_eq!(datetime_to_timestamp(2023, 2, 29, 0, 0, 0), 0);
}
#[test]
fn datetime_to_timestamp_valid_date_range() {
let ts = datetime_to_timestamp(2026, 6, 2, 0, 0, 0);
assert!(ts > 0);
assert!(ts > 1700000000, "2026-06-02 should be well past epoch");
}
#[test]
fn count_writer_tracks_bytes() {
let inner = Vec::new();
let mut writer = CountWriter { inner, count: 0 };
let n = writer.write(b"hello").unwrap();
assert_eq!(n, 5);
assert_eq!(writer.count, 5);
let n = writer.write(b" world").unwrap();
assert_eq!(n, 6);
assert_eq!(writer.count, 11);
writer.flush().unwrap();
assert_eq!(&writer.inner, b"hello world");
}
struct MockDirReader {
entries: Vec<Entry>,
}
impl ArchiveReader for MockDirReader {
fn format(&self) -> ArchiveFormat {
ArchiveFormat::Tar
}
fn entries(&mut self) -> GeeZipResult<Vec<Entry>> {
Ok(self.entries.clone())
}
fn extract(&mut self, _entry: &Entry, writer: &mut dyn Write) -> GeeZipResult<u64> {
let content = b"file content";
writer.write_all(content)?;
Ok(content.len() as u64)
}
}
#[test]
fn extract_all_creates_directories() {
let entries = vec![
Entry {
path: "emptydir".into(),
size: 0,
compressed_size: 0,
crc32: None,
modified: None,
is_dir: true,
},
Entry {
path: "emptydir/file.txt".into(),
size: 12,
compressed_size: 0,
crc32: None,
modified: None,
is_dir: false,
},
];
let mut reader = MockDirReader { entries };
let tmp = tempfile::tempdir().unwrap();
let report = reader.extract_all(tmp.path(), true).unwrap();
assert!(
tmp.path().join("emptydir").is_dir(),
"directory entry should create a directory on disk"
);
assert!(
tmp.path().join("emptydir/file.txt").is_file(),
"file entry should be extracted"
);
assert_eq!(report.files_extracted, 2);
assert_eq!(report.bytes_extracted, 12);
assert!(report.errors.is_empty(), "extract_all errors: {report:?}");
}
struct MockFileReader {
entries: Vec<Entry>,
}
impl ArchiveReader for MockFileReader {
fn format(&self) -> ArchiveFormat {
ArchiveFormat::Tar
}
fn entries(&mut self) -> GeeZipResult<Vec<Entry>> {
Ok(self.entries.clone())
}
fn extract(&mut self, _entry: &Entry, writer: &mut dyn Write) -> GeeZipResult<u64> {
let content = b"mock file content";
writer.write_all(content)?;
Ok(content.len() as u64)
}
}
#[test]
fn extract_all_skips_existing_on_no_clobber() {
let entries = vec![Entry {
path: "existing.txt".into(),
size: 16,
compressed_size: 0,
crc32: None,
modified: None,
is_dir: false,
}];
let tmp = tempfile::tempdir().unwrap();
let dest = tmp.path().to_path_buf();
let mut reader = MockFileReader {
entries: entries.clone(),
};
let report1 = reader.extract_all(&dest, true).unwrap();
assert_eq!(
report1.files_extracted, 1,
"should extract the file on first run"
);
assert_eq!(report1.files_skipped, 0);
assert!(report1.errors.is_empty(), "errors: {report1:?}");
let content = std::fs::read_to_string(dest.join("existing.txt")).unwrap();
assert_eq!(content, "mock file content");
let mut reader2 = MockFileReader {
entries: entries.clone(),
};
let report2 = reader2.extract_all(&dest, false).unwrap();
assert_eq!(report2.files_extracted, 0);
assert_eq!(report2.files_skipped, 1);
assert_eq!(
report2.errors.len(),
1,
"should have one ClobberDenied error"
);
assert!(
matches!(report2.errors[0].1, GeeZipError::ClobberDenied { .. }),
"error should be ClobberDenied"
);
let content2 = std::fs::read_to_string(dest.join("existing.txt")).unwrap();
assert_eq!(
content2, "mock file content",
"file should not be overwritten"
);
}
}