use crate::error::{GdownError, Result};
use flate2::read::GzDecoder;
use std::fs::File;
use std::path::{Path, PathBuf};
use tar::Archive as TarArchive;
use zip::ZipArchive;
const SUPPORTED_EXTENSIONS: &[&str] = &[
"zip", "tar", "tar.gz", "tgz", "tar.bz2", "tbz",
];
fn is_safe_path(destination: &Path, member_path: &Path) -> bool {
let normalized: PathBuf = member_path
.components()
.filter(|c| !matches!(c, std::path::Component::ParentDir))
.collect();
let full_path = destination.join(&normalized);
full_path.starts_with(destination)
}
fn sanitize_filename(filename: &str) -> String {
let filename = filename.replace("\x00", "");
let filename = filename.replace("/", "_").replace("\\", "_").trim().to_string();
if filename.is_empty() || filename == "." || filename == ".." {
return "_".to_string();
}
filename
}
pub fn extractall(archive: &Path, destination: &Path, quiet: bool) -> Result<Vec<PathBuf>> {
let extension = archive
.extension()
.and_then(|e| e.to_str())
.unwrap_or("");
let stem = archive.file_stem().and_then(|s| s.to_str()).unwrap_or("");
let full_ext = if stem.ends_with(".tar") && extension == "gz" {
"tar.gz"
} else if stem.ends_with(".tar") && extension == "bz2" {
"tar.bz2"
} else {
extension
};
let extracted = match full_ext {
"zip" => extract_zip(archive, destination),
"tar" => extract_tar(archive, destination),
"gz" | "tgz" if stem.ends_with(".tar") => extract_tar_gz(archive, destination),
"tar.gz" => extract_tar_gz(archive, destination),
"bz2" | "tbz" if stem.ends_with(".tar") => extract_tar_bz2(archive, destination),
"tar.bz2" => extract_tar_bz2(archive, destination),
_ => Err(GdownError::Extraction(format!("Unsupported archive format: {}", full_ext))),
}?;
if !quiet {
println!("Extracted {} files to {:?}", extracted.len(), destination);
}
Ok(extracted)
}
fn extract_zip(archive: &Path, destination: &Path) -> Result<Vec<PathBuf>> {
let file = File::open(archive).map_err(GdownError::Io)?;
let mut zip = ZipArchive::new(file).map_err(|e| GdownError::Extraction(e.to_string()))?;
let mut extracted = Vec::new();
for i in 0..zip.len() {
let mut file = zip.by_index(i).map_err(|e| GdownError::Extraction(e.to_string()))?;
let outpath = destination.join(file.name());
if !is_safe_path(destination, &outpath) {
continue;
}
let sanitized_name = sanitize_filename(file.name());
let final_path = destination.join(&sanitized_name);
if file.is_dir() {
std::fs::create_dir_all(&final_path)?;
} else {
if let Some(parent) = final_path.parent() {
std::fs::create_dir_all(parent)?;
}
let mut outfile = File::create(&final_path)?;
std::io::copy(&mut file, &mut outfile)?;
extracted.push(final_path);
}
#[cfg(unix)]
{
use std::os::unix::fs::PermissionsExt;
if let Some(mode) = file.unix_mode() {
std::fs::set_permissions(&final_path, std::fs::Permissions::from_mode(mode))?;
}
}
}
Ok(extracted)
}
fn extract_tar(archive: &Path, destination: &Path) -> Result<Vec<PathBuf>> {
let file = File::open(archive).map_err(GdownError::Io)?;
let mut tar = TarArchive::new(file);
let mut extracted = Vec::new();
for entry in tar.entries().map_err(|e| GdownError::Extraction(e.to_string()))? {
let mut entry = entry.map_err(|e| GdownError::Extraction(e.to_string()))?;
let path = entry.path().map_err(|e| GdownError::Extraction(e.to_string()))?.into_owned();
if !is_safe_path(destination, &path) {
continue;
}
let entry_type = entry.header().entry_type();
if entry_type.is_symlink() || entry_type.is_hard_link() {
continue;
}
let sanitized_name = sanitize_filename(path.to_str().unwrap_or(""));
let final_path = destination.join(&sanitized_name);
if entry_type.is_dir() {
std::fs::create_dir_all(&final_path)?;
} else {
if let Some(parent) = final_path.parent() {
std::fs::create_dir_all(parent)?;
}
entry.unpack(&final_path).map_err(|e| GdownError::Extraction(e.to_string()))?;
extracted.push(final_path);
}
}
Ok(extracted)
}
fn extract_tar_gz(archive: &Path, destination: &Path) -> Result<Vec<PathBuf>> {
let file = File::open(archive).map_err(GdownError::Io)?;
let decoder = GzDecoder::new(file);
let mut tar = TarArchive::new(decoder);
let mut extracted = Vec::new();
for entry in tar.entries().map_err(|e| GdownError::Extraction(e.to_string()))? {
let mut entry = entry.map_err(|e| GdownError::Extraction(e.to_string()))?;
let path = entry.path().map_err(|e| GdownError::Extraction(e.to_string()))?.into_owned();
if !is_safe_path(destination, &path) {
continue;
}
let entry_type = entry.header().entry_type();
if entry_type.is_symlink() || entry_type.is_hard_link() {
continue;
}
let sanitized_name = sanitize_filename(path.to_str().unwrap_or(""));
let final_path = destination.join(&sanitized_name);
if entry_type.is_dir() {
std::fs::create_dir_all(&final_path)?;
} else {
if let Some(parent) = final_path.parent() {
std::fs::create_dir_all(parent)?;
}
entry.unpack(&final_path).map_err(|e| GdownError::Extraction(e.to_string()))?;
extracted.push(final_path);
}
}
Ok(extracted)
}
fn extract_tar_bz2(archive: &Path, destination: &Path) -> Result<Vec<PathBuf>> {
let file = File::open(archive).map_err(GdownError::Io)?;
let decoder = bzip2::read::BzDecoder::new(file);
let mut tar = TarArchive::new(decoder);
let mut extracted = Vec::new();
for entry in tar.entries().map_err(|e| GdownError::Extraction(e.to_string()))? {
let mut entry = entry.map_err(|e| GdownError::Extraction(e.to_string()))?;
let path = entry.path().map_err(|e| GdownError::Extraction(e.to_string()))?.into_owned();
if !is_safe_path(destination, &path) {
continue;
}
let entry_type = entry.header().entry_type();
if entry_type.is_symlink() || entry_type.is_hard_link() {
continue;
}
let sanitized_name = sanitize_filename(path.to_str().unwrap_or(""));
let final_path = destination.join(&sanitized_name);
if entry_type.is_dir() {
std::fs::create_dir_all(&final_path)?;
} else {
if let Some(parent) = final_path.parent() {
std::fs::create_dir_all(parent)?;
}
entry.unpack(&final_path).map_err(|e| GdownError::Extraction(e.to_string()))?;
extracted.push(final_path);
}
}
Ok(extracted)
}
pub fn is_archive(path: &Path) -> bool {
let extension = path
.extension()
.and_then(|e| e.to_str())
.unwrap_or("");
let stem = path.file_stem().and_then(|s| s.to_str()).unwrap_or("");
SUPPORTED_EXTENSIONS.contains(&extension)
|| (stem.ends_with(".tar") && (extension == "gz" || extension == "bz2"))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_is_safe_path() {
let dest = Path::new("C:\\tmp\\extract");
let safe = Path::new("C:\\tmp\\extract\\nested\\file.txt");
assert!(is_safe_path(dest, safe));
}
#[test]
fn test_sanitize_filename() {
assert_eq!(sanitize_filename("..根目录"), "..根目录");
assert_eq!(sanitize_filename(".."), "_");
assert_eq!(sanitize_filename("."), "_");
assert_eq!(sanitize_filename(""), "_");
assert_eq!(sanitize_filename("normal.txt"), "normal.txt");
assert_eq!(sanitize_filename("Budget/2024.pdf"), "Budget_2024.pdf");
assert_eq!(sanitize_filename("path\\to\\file.pdf"), "path_to_file.pdf");
assert_eq!(sanitize_filename("file\x00name.txt"), "filename.txt");
assert_eq!(sanitize_filename("name/with/slashes.txt"), "name_with_slashes.txt");
assert_eq!(sanitize_filename("name\\with\\backslashes.txt"), "name_with_backslashes.txt");
assert_eq!(sanitize_filename("\x00nullbyte"), "nullbyte");
assert_eq!(sanitize_filename(" file.txt "), "file.txt");
assert_eq!(sanitize_filename("/leading slash"), "_leading slash");
assert_eq!(sanitize_filename("trailing slash/"), "trailing slash_");
assert_eq!(sanitize_filename("multiple///slashes"), "multiple___slashes");
}
#[test]
fn test_is_safe_path_unsafe_windows() {
let dest = Path::new("C:\\tmp\\extract");
let unsafe_path = Path::new("C:\\tmp\\..\\..\\Windows\\System32");
assert!(!is_safe_path(dest, unsafe_path));
}
#[test]
fn test_is_safe_path_absolute() {
let dest = Path::new("C:\\tmp\\extract");
let absolute = Path::new("C:\\Windows\\system32\\file.txt");
assert!(!is_safe_path(dest, absolute));
}
#[test]
fn test_is_safe_path_same_file() {
let dest = Path::new("C:\\tmp\\extract");
let same = Path::new("C:\\tmp\\extract");
assert!(is_safe_path(dest, same));
}
#[test]
fn test_is_safe_path_parent_in_name() {
let dest = Path::new("C:\\tmp\\extract");
let parent_in_name = Path::new("C:\\tmp\\extract\\..hidden\\file.txt");
assert!(is_safe_path(dest, parent_in_name));
}
#[test]
fn test_is_archive() {
assert!(is_archive(Path::new("file.zip")));
assert!(is_archive(Path::new("file.tar")));
assert!(is_archive(Path::new("file.tar.gz")));
assert!(is_archive(Path::new("file.tgz")));
assert!(is_archive(Path::new("file.tar.bz2")));
assert!(!is_archive(Path::new("file.txt")));
}
}