use std::{
io::{self, BufReader, Read},
ops::ControlFlow,
path::{Path, PathBuf},
};
use fs_err::{self as fs, PathExt};
use crate::{
BUFFER_CAPACITY, INITIAL_CURRENT_DIR, QuestionAction, QuestionPolicy, Result,
commands::{warn_user_about_loading_sevenz_in_memory, warn_user_about_loading_zip_in_memory},
extension::{
CompressionFormat::{self, *},
Extension, split_first_compression_format,
},
info, info_accessible,
non_archive::lz4::MultiFrameLz4Decoder,
utils::{
self, BytesFmt, PathFmt, file_size,
io::{ReadSeek, lock_and_flush_output_stdio},
is_path_stdin, resolve_path_conflict, user_wants_to_continue,
},
};
pub struct DecompressOptions<'a> {
pub input_file_path: &'a Path,
pub formats: Vec<Extension>,
pub output_dir: &'a Path,
pub output_file_path: PathBuf,
pub output_dir_was_explicit: bool,
pub here: bool,
pub question_policy: QuestionPolicy,
pub password: Option<&'a [u8]>,
pub remove: bool,
}
enum DecompressionSummary {
Archive { files_unpacked: u64, output_path: PathBuf },
NonArchive { output_path: PathBuf },
}
pub fn decompress_file(options: DecompressOptions) -> Result<()> {
assert!(options.output_dir.fs_err_try_exists()?);
let input_is_stdin = is_path_stdin(options.input_file_path);
let (first_extension, extensions) = split_first_compression_format(&options.formats);
let chain_reader_decoder = |format: &CompressionFormat, decoder: Box<dyn Read>| -> Result<Box<dyn Read>> {
let decoder: Box<dyn Read> = match format {
Gzip => Box::new(flate2::read::MultiGzDecoder::new(decoder)),
Bzip => Box::new(bzip2::read::MultiBzDecoder::new(decoder)),
Bzip3 => {
#[cfg(not(feature = "bzip3"))]
return Err(crate::Error::bzip3_no_support());
#[cfg(feature = "bzip3")]
Box::new(bzip3::read::Bz3Decoder::new(decoder)?)
}
Lz4 => Box::new(MultiFrameLz4Decoder::new(decoder)),
Lzma => Box::new(lzma_rust2::LzmaReader::new_mem_limit(decoder, u32::MAX, None)?),
Xz => Box::new(lzma_rust2::XzReader::new(decoder, true)),
Lzip => Box::new(lzma_rust2::LzipReader::new(decoder)),
Snappy => Box::new(snap::read::FrameDecoder::new(decoder)),
Zstd => Box::new(zstd::stream::Decoder::new(decoder)?),
Brotli => Box::new(brotli::Decompressor::new(decoder, BUFFER_CAPACITY)),
Tar | Zip | Rar | SevenZip => unreachable!(),
};
Ok(decoder)
};
let create_decoder_up_to_first_extension = || -> Result<Box<dyn Read>> {
let mut reader: Box<dyn Read> = if input_is_stdin {
Box::new(io::stdin())
} else {
Box::new(BufReader::with_capacity(
BUFFER_CAPACITY,
fs::File::open(options.input_file_path)?,
))
};
for format in extensions.iter().rev() {
reader = chain_reader_decoder(format, reader)?;
}
Ok(reader)
};
let archive_output_dir: &Path = if options.output_dir_was_explicit || options.here {
options.output_dir
} else {
&options.output_file_path
};
let control_flow = match first_extension {
Gzip | Bzip | Bzip3 | Lz4 | Lzma | Xz | Lzip | Snappy | Zstd | Brotli => {
let reader = create_decoder_up_to_first_extension()?;
let mut reader = chain_reader_decoder(&first_extension, reader)?;
let (mut writer, final_output_path) = match utils::create_file_or_prompt_on_conflict(
&options.output_file_path,
options.question_policy,
QuestionAction::Decompression,
)? {
Some(file) => file,
None => return Ok(()),
};
io::copy(&mut reader, &mut writer)?;
ControlFlow::Continue(DecompressionSummary::NonArchive {
output_path: final_output_path,
})
}
Tar => unpack_archive(
|output_dir| crate::archive::tar::unpack_archive(create_decoder_up_to_first_extension()?, output_dir),
archive_output_dir,
options.question_policy,
)?,
Zip | SevenZip => {
let unpack_fn = match first_extension {
Zip => crate::archive::zip::unpack_archive,
SevenZip => crate::archive::sevenz::unpack_archive,
_ => unreachable!(),
};
let should_load_everything_into_memory = input_is_stdin || !extensions.is_empty();
let reader: Box<dyn ReadSeek> = if should_load_everything_into_memory {
let memory_warning_fn = match first_extension {
Zip => warn_user_about_loading_zip_in_memory,
SevenZip => warn_user_about_loading_sevenz_in_memory,
_ => unreachable!(),
};
let locks = lock_and_flush_output_stdio();
memory_warning_fn();
if !user_wants_to_continue(
options.input_file_path,
options.question_policy,
QuestionAction::Decompression,
)? {
return Ok(());
}
drop(locks);
let mut vec = vec![];
io::copy(&mut create_decoder_up_to_first_extension()?, &mut vec)?;
Box::new(io::Cursor::new(vec))
} else {
Box::new(BufReader::with_capacity(
BUFFER_CAPACITY,
fs::File::open(options.input_file_path)?,
))
};
unpack_archive(
|output_dir| unpack_fn(reader, output_dir, options.password),
archive_output_dir,
options.question_policy,
)?
}
#[cfg(feature = "unrar")]
Rar => {
let unpack_fn: Box<dyn FnOnce(&Path) -> Result<u64>> = if options.formats.len() > 1 || input_is_stdin {
let mut temp_file = tempfile::NamedTempFile::new()?;
io::copy(&mut create_decoder_up_to_first_extension()?, &mut temp_file)?;
Box::new(move |output_dir| {
crate::archive::rar::unpack_archive(temp_file.path(), output_dir, options.password)
})
} else {
Box::new(|output_dir| {
crate::archive::rar::unpack_archive(options.input_file_path, output_dir, options.password)
})
};
unpack_archive(unpack_fn, archive_output_dir, options.question_policy)?
}
#[cfg(not(feature = "unrar"))]
Rar => {
return Err(crate::Error::rar_no_support());
}
};
let ControlFlow::Continue(decompression_summary) = control_flow else {
return Ok(());
};
match decompression_summary {
DecompressionSummary::Archive {
files_unpacked,
output_path,
} => {
if !options.output_dir_was_explicit && !options.here {
deduplicate_basename_wrapper(&output_path)?;
}
info_accessible!("Successfully decompressed archive to {}", PathFmt(&output_path));
info_accessible!("Files unpacked: {files_unpacked}");
}
DecompressionSummary::NonArchive { output_path } => {
if input_is_stdin {
info_accessible!("STDIN decompressed to {}", PathFmt(&output_path));
} else {
info_accessible!(
"File {} decompressed to {}",
PathFmt(options.input_file_path),
PathFmt(&output_path),
);
info_accessible!("Input file size: {}", BytesFmt(file_size(options.input_file_path)?));
}
info_accessible!("Output file size: {}", BytesFmt(file_size(&output_path)?));
}
}
if !input_is_stdin && options.remove {
fs::remove_file(options.input_file_path)?;
info!("Removed input file {}", PathFmt(options.input_file_path));
}
Ok(())
}
fn unpack_archive(
unpack_fn: impl FnOnce(&Path) -> Result<u64>,
output_dir: &Path,
question_policy: QuestionPolicy,
) -> Result<ControlFlow<(), DecompressionSummary>> {
let is_cwd = output_dir == *INITIAL_CURRENT_DIR;
let is_valid_output_dir =
is_cwd || !output_dir.fs_err_try_exists()? || (output_dir.is_dir() && output_dir.read_dir()?.next().is_none());
let output_dir_cleaned = if is_valid_output_dir {
output_dir.to_owned()
} else if let Some(path) = resolve_path_conflict(output_dir, question_policy, QuestionAction::Decompression)? {
path
} else {
return Ok(ControlFlow::Break(()));
};
if !output_dir_cleaned.fs_err_try_exists()? {
fs::create_dir(&output_dir_cleaned)?;
}
let files_unpacked = unpack_fn(&output_dir_cleaned)?;
Ok(ControlFlow::Continue(DecompressionSummary::Archive {
files_unpacked,
output_path: output_dir_cleaned,
}))
}
fn deduplicate_basename_wrapper(wrapper: &Path) -> Result<()> {
let Some(wrapper_name) = wrapper.file_name() else {
return Ok(());
};
let only_file_in_dir = {
let mut entries = fs::read_dir(wrapper)?;
let Some(first_file) = entries.next().transpose()? else {
return Ok(());
};
if entries.next().transpose()?.is_some() {
return Ok(());
}
first_file
};
if only_file_in_dir.file_name() != wrapper_name {
return Ok(());
}
let inner_path = only_file_in_dir.path();
let Some(parent) = wrapper.parent() else {
return Ok(());
};
let sibling_path = parent.join("ouch-temporary");
fs::create_dir(&sibling_path)?;
let path_inside_sibling = sibling_path.join(wrapper_name);
fs::rename(&inner_path, &path_inside_sibling)?;
fs::remove_dir(wrapper)?;
fs::rename(&path_inside_sibling, wrapper)?;
fs::remove_dir(sibling_path)?;
Ok(())
}
#[cfg(test)]
mod tests {
use std::fs as std_fs;
use tempfile::tempdir;
use super::*;
fn list_tree(root: &Path) -> Vec<String> {
fn walk(p: &Path, base: &Path, out: &mut Vec<String>) {
if let Ok(entries) = std_fs::read_dir(p) {
for entry in entries.flatten() {
let path = entry.path();
let rel = path.strip_prefix(base).unwrap().to_string_lossy().replace('\\', "/");
if path.is_dir() {
out.push(format!("{rel}/"));
walk(&path, base, out);
} else {
out.push(rel);
}
}
}
}
let mut out = Vec::new();
walk(root, root, &mut out);
out.sort();
out
}
#[test]
fn deduplicate_flattens_when_inner_dir_matches_wrapper_name() {
let dir = tempdir().unwrap();
let wrapper = dir.path().join("archive");
let inner = wrapper.join("archive");
std_fs::create_dir_all(&inner).unwrap();
std_fs::write(inner.join("a.txt"), "a").unwrap();
std_fs::write(inner.join("b.txt"), "b").unwrap();
deduplicate_basename_wrapper(&wrapper).unwrap();
assert_eq!(list_tree(&wrapper), vec!["a.txt", "b.txt"]);
}
#[test]
fn deduplicate_keeps_wrapper_when_inner_name_differs() {
let dir = tempdir().unwrap();
let wrapper = dir.path().join("archive");
let inner = wrapper.join("mytool");
std_fs::create_dir_all(&inner).unwrap();
std_fs::write(inner.join("file.txt"), "x").unwrap();
deduplicate_basename_wrapper(&wrapper).unwrap();
assert_eq!(list_tree(&wrapper), vec!["mytool/", "mytool/file.txt"]);
}
#[test]
fn deduplicate_keeps_wrapper_when_multiple_entries() {
let dir = tempdir().unwrap();
let wrapper = dir.path().join("archive");
std_fs::create_dir_all(&wrapper).unwrap();
std_fs::write(wrapper.join("a.txt"), "a").unwrap();
std_fs::write(wrapper.join("b.txt"), "b").unwrap();
deduplicate_basename_wrapper(&wrapper).unwrap();
assert_eq!(list_tree(&wrapper), vec!["a.txt", "b.txt"]);
}
#[test]
fn deduplicate_is_noop_on_empty_wrapper() {
let dir = tempdir().unwrap();
let wrapper = dir.path().join("archive");
std_fs::create_dir(&wrapper).unwrap();
deduplicate_basename_wrapper(&wrapper).unwrap();
assert!(wrapper.is_dir());
assert_eq!(list_tree(&wrapper), Vec::<String>::new());
}
#[test]
fn deduplicate_promotes_single_inner_file_with_matching_name() {
let dir = tempdir().unwrap();
let wrapper = dir.path().join("archive");
std_fs::create_dir(&wrapper).unwrap();
std_fs::write(wrapper.join("archive"), "data").unwrap();
deduplicate_basename_wrapper(&wrapper).unwrap();
assert!(wrapper.is_file(), "wrapper should now be a file, not a directory");
assert_eq!(std_fs::read(&wrapper).unwrap(), b"data");
}
#[test]
fn deduplicate_only_flattens_outer_wrapper_not_inner_duplicates() {
let dir = tempdir().unwrap();
let wrapper = dir.path().join("testing");
let outer_inner = wrapper.join("testing");
let nested = outer_inner.join("testing");
std_fs::create_dir_all(&nested).unwrap();
std_fs::write(nested.join("file"), "deep").unwrap();
deduplicate_basename_wrapper(&wrapper).unwrap();
assert_eq!(list_tree(&wrapper), vec!["testing/", "testing/file"]);
}
}