use crate::sorting::{FileTypeUnion, hash_depth};
use std::collections::HashMap;
use std::path::Path;
use anyhow::{Result, ensure};
use indicatif::{ProgressBar, ProgressStyle};
use sha2::{Digest, Sha256};
#[allow(clippy::too_many_lines)]
pub fn unzip_files_by_type<P: AsRef<Path>>(
source: P,
destination: P,
password: Option<&String>,
depth: u8,
file_type: Option<FileTypeUnion>,
#[cfg(feature = "libmagic")] file_cmd: Option<&str>,
keep_unknowns: bool,
) -> Result<usize> {
ensure!(source.as_ref().is_file(), "Source must be a file");
ensure!(
destination.as_ref().is_dir(),
"Destination must be a directory"
);
#[cfg(feature = "libmagic")]
let cookie = {
let cookie = magic::Cookie::open(magic::cookie::Flags::ERROR)?;
let database = magic::cookie::DatabasePaths::default();
cookie
.load(&database)
.map_err(|e| anyhow::anyhow!("Failed to load magic database: {e}"))?
};
let mut extracted_files = 0;
let file = std::fs::File::open(source)?;
let mut archive = zip::ZipArchive::new(file)?;
let pb = progress_bar_with_eta(archive.len() as u64);
for i in 0..archive.len() {
let mut file = if let Some(password) = password {
let Ok(f) = archive.by_index_decrypt(i, password.as_bytes()) else {
continue;
};
f
} else {
match archive.by_index(i) {
Ok(f) => f,
Err(e) => {
eprintln!("ZipError: {e}");
continue;
}
}
};
if (*file.name()).ends_with('/') {
continue;
}
let mut contents = Vec::new();
if let Err(e) = std::io::copy(&mut file, &mut contents) {
eprintln!("ZipError: {e}");
continue;
}
let hash = hex::encode(Sha256::digest(&contents));
#[cfg(not(feature = "libmagic"))]
let mut destination_directory = if let Some(file_type) = &file_type {
if file_type.matches(&contents) {
let mut dest = destination.as_ref().to_owned();
dest.push(file_type.to_string());
dest.push(hash_depth(&hash, depth));
dest
} else {
pb.inc(1);
continue;
}
} else {
let this_type = FileTypeUnion::from_bytes(&contents);
if !keep_unknowns && this_type.is_unknown() {
pb.inc(1);
continue;
}
let mut dest = destination.as_ref().to_owned();
dest.push(this_type.to_string());
dest.push(hash_depth(&hash, depth));
dest
};
#[cfg(feature = "libmagic")]
let mut destination_directory = if let Some(file_cmd) = file_cmd {
let file_cmd = file_cmd.to_lowercase();
let result = cookie.buffer(&contents)?.to_lowercase();
if result.contains(&file_cmd) {
let mut dest = destination.as_ref().to_owned();
dest.push(file_cmd);
dest.push(hash_depth(&hash, depth));
dest
} else {
pb.inc(1);
continue;
}
} else if let Some(file_type) = &file_type {
if file_type.matches(&contents) {
let mut dest = destination.as_ref().to_owned();
dest.push(file_type.to_string());
dest.push(hash_depth(&hash, depth));
dest
} else {
pb.inc(1);
continue;
}
} else {
let this_type = FileTypeUnion::from_bytes(&contents);
if !keep_unknowns && this_type.is_unknown() {
pb.inc(1);
continue;
}
let mut dest = destination.as_ref().to_owned();
dest.push(this_type.to_string());
dest.push(hash_depth(&hash, depth));
dest
};
if let Err(e) = std::fs::create_dir_all(&destination_directory) {
eprintln!(
"ZipError creating directories {}: {e}",
destination_directory.display()
);
return Err(e.into());
}
destination_directory.push(hash);
if let Err(e) = std::fs::write(&destination_directory, contents) {
eprintln!(
"ZipError writing file {}: {e}",
destination_directory.display()
);
return Err(e.into());
}
extracted_files += 1;
pb.inc(1);
}
pb.finish_and_clear();
Ok(extracted_files)
}
pub struct ZipSummaryDetails {
pub file_type_counts: HashMap<FileTypeUnion, usize>,
#[cfg(not(feature = "libmagic"))]
pub unknown_magic_counts: HashMap<Vec<u8>, usize>,
#[cfg(feature = "libmagic")]
pub unknown_magic_counts: HashMap<Vec<u8>, (usize, String)>,
pub total_files: usize,
}
pub fn zip_file_type_counts<P: AsRef<Path>>(
source: P,
password: Option<&String>,
unknown_magic: usize,
) -> Result<ZipSummaryDetails> {
ensure!(source.as_ref().is_file(), "Source must be a file");
#[cfg(feature = "libmagic")]
let (cookie, mut unknowns) = {
let cookie = magic::Cookie::open(magic::cookie::Flags::ERROR)?;
let database = &magic::cookie::DatabasePaths::default();
let cookie = cookie
.load(database)
.map_err(|e| anyhow::anyhow!("Failed to load magic database: {e}"))?;
(cookie, HashMap::<Vec<u8>, (usize, String)>::new())
};
#[cfg(not(feature = "libmagic"))]
let mut unknowns = HashMap::new();
let mut summary = HashMap::new();
let mut total_files = 0;
let file = std::fs::File::open(source)?;
let mut archive = zip::ZipArchive::new(file)?;
let pb = progress_bar_with_eta(archive.len() as u64);
for i in 0..archive.len() {
let mut file = if let Some(password) = password {
let Ok(f) = archive.by_index_decrypt(i, password.as_bytes()) else {
continue;
};
f
} else {
match archive.by_index(i) {
Ok(f) => f,
Err(e) => {
eprintln!("ZipError: {e}");
continue;
}
}
};
if (*file.name()).ends_with('/') {
continue;
}
let mut contents = Vec::new();
if let Err(e) = std::io::copy(&mut file, &mut contents) {
eprintln!("ZipError: {e}");
continue;
}
let this_type = FileTypeUnion::from_bytes(&contents);
summary
.entry(this_type)
.and_modify(|e| *e += 1)
.or_insert(1);
if this_type.is_unknown() && unknown_magic > 0 {
let first_bytes = contents
.iter()
.take(unknown_magic)
.copied()
.collect::<Vec<_>>();
#[cfg(not(feature = "libmagic"))]
unknowns
.entry(first_bytes)
.and_modify(|e| *e += 1)
.or_insert(1);
#[cfg(feature = "libmagic")]
{
if let Some(entry) = unknowns.get_mut(&first_bytes) {
entry.0 += 1;
} else {
let result = cookie.buffer(&contents)?;
unknowns.insert(first_bytes, (1, result));
}
}
}
total_files += 1;
pb.inc(1);
}
pb.finish_and_clear();
Ok(ZipSummaryDetails {
file_type_counts: summary,
unknown_magic_counts: unknowns,
total_files,
})
}
fn progress_bar_with_eta(len: u64) -> ProgressBar {
ProgressBar::new(len)
.with_style(ProgressStyle::with_template("{wide_bar} {pos}/{len} {eta}").unwrap())
}