1use crate::sorting::{FileTypeUnion, hash_depth};
7
8use std::collections::HashMap;
9use std::path::Path;
10
11use anyhow::{Result, ensure};
12use indicatif::{ProgressBar, ProgressStyle};
13use sha2::{Digest, Sha256};
14
15#[allow(clippy::too_many_lines)]
22pub fn unzip_files_by_type<P: AsRef<Path>>(
23 source: P,
24 destination: P,
25 password: Option<&String>,
26 depth: u8,
27 file_type: Option<FileTypeUnion>,
28 #[cfg(feature = "libmagic")] file_cmd: Option<&str>,
29 keep_unknowns: bool,
30) -> Result<usize> {
31 ensure!(source.as_ref().is_file(), "Source must be a file");
32 ensure!(
33 destination.as_ref().is_dir(),
34 "Destination must be a directory"
35 );
36
37 #[cfg(feature = "libmagic")]
38 let cookie = {
39 let cookie = magic::Cookie::open(magic::cookie::Flags::ERROR)?;
40 let database = magic::cookie::DatabasePaths::default();
41 cookie
42 .load(&database)
43 .map_err(|e| anyhow::anyhow!("Failed to load magic database: {e}"))?
44 };
45
46 let mut extracted_files = 0;
47 let file = std::fs::File::open(source)?;
48 let mut archive = zip::ZipArchive::new(file)?;
49 let pb = progress_bar_with_eta(archive.len() as u64);
50 for i in 0..archive.len() {
51 let mut file = if let Some(password) = password {
52 let Ok(f) = archive.by_index_decrypt(i, password.as_bytes()) else {
53 continue;
54 };
55 f
56 } else {
57 match archive.by_index(i) {
58 Ok(f) => f,
59 Err(e) => {
60 eprintln!("ZipError: {e}");
61 continue;
62 }
63 }
64 };
65
66 if (*file.name()).ends_with('/') {
67 continue;
68 }
69
70 let mut contents = Vec::new();
71 if let Err(e) = std::io::copy(&mut file, &mut contents) {
72 eprintln!("ZipError: {e}");
73 continue;
74 }
75
76 let hash = hex::encode(Sha256::digest(&contents));
77 #[cfg(not(feature = "libmagic"))]
78 let mut destination_directory = if let Some(file_type) = &file_type {
79 if file_type.matches(&contents) {
80 let mut dest = destination.as_ref().to_owned();
81 dest.push(file_type.to_string());
82 dest.push(hash_depth(&hash, depth));
83 dest
84 } else {
85 pb.inc(1);
86 continue;
87 }
88 } else {
89 let this_type = FileTypeUnion::from_bytes(&contents);
90 if !keep_unknowns && this_type.is_unknown() {
91 pb.inc(1);
92 continue;
93 }
94
95 let mut dest = destination.as_ref().to_owned();
96 dest.push(this_type.to_string());
97 dest.push(hash_depth(&hash, depth));
98 dest
99 };
100
101 #[cfg(feature = "libmagic")]
102 let mut destination_directory = if let Some(file_cmd) = file_cmd {
103 let file_cmd = file_cmd.to_lowercase();
104 let result = cookie.buffer(&contents)?.to_lowercase();
105 if result.contains(&file_cmd) {
106 let mut dest = destination.as_ref().to_owned();
107 dest.push(file_cmd);
108 dest.push(hash_depth(&hash, depth));
109 dest
110 } else {
111 pb.inc(1);
112 continue;
113 }
114 } else if let Some(file_type) = &file_type {
115 if file_type.matches(&contents) {
116 let mut dest = destination.as_ref().to_owned();
117 dest.push(file_type.to_string());
118 dest.push(hash_depth(&hash, depth));
119 dest
120 } else {
121 pb.inc(1);
122 continue;
123 }
124 } else {
125 let this_type = FileTypeUnion::from_bytes(&contents);
126 if !keep_unknowns && this_type.is_unknown() {
127 pb.inc(1);
128 continue;
129 }
130
131 let mut dest = destination.as_ref().to_owned();
132 dest.push(this_type.to_string());
133 dest.push(hash_depth(&hash, depth));
134 dest
135 };
136
137 if let Err(e) = std::fs::create_dir_all(&destination_directory) {
138 eprintln!(
139 "ZipError creating directories {}: {e}",
140 destination_directory.display()
141 );
142 return Err(e.into());
143 }
144 destination_directory.push(hash);
145 if let Err(e) = std::fs::write(&destination_directory, contents) {
146 eprintln!(
147 "ZipError writing file {}: {e}",
148 destination_directory.display()
149 );
150 return Err(e.into());
151 }
152
153 extracted_files += 1;
154 pb.inc(1);
155 }
156 pb.finish_and_clear();
157
158 Ok(extracted_files)
159}
160
161pub struct ZipSummaryDetails {
163 pub file_type_counts: HashMap<FileTypeUnion, usize>,
165
166 #[cfg(not(feature = "libmagic"))]
168 pub unknown_magic_counts: HashMap<Vec<u8>, usize>,
169
170 #[cfg(feature = "libmagic")]
172 pub unknown_magic_counts: HashMap<Vec<u8>, (usize, String)>,
173
174 pub total_files: usize,
176}
177
178pub fn zip_file_type_counts<P: AsRef<Path>>(
185 source: P,
186 password: Option<&String>,
187 unknown_magic: usize,
188) -> Result<ZipSummaryDetails> {
189 ensure!(source.as_ref().is_file(), "Source must be a file");
190
191 #[cfg(feature = "libmagic")]
192 let (cookie, mut unknowns) = {
193 let cookie = magic::Cookie::open(magic::cookie::Flags::ERROR)?;
194 let database = &magic::cookie::DatabasePaths::default();
195 let cookie = cookie
196 .load(database)
197 .map_err(|e| anyhow::anyhow!("Failed to load magic database: {e}"))?;
198 (cookie, HashMap::<Vec<u8>, (usize, String)>::new())
199 };
200
201 #[cfg(not(feature = "libmagic"))]
202 let mut unknowns = HashMap::new();
203
204 let mut summary = HashMap::new();
205 let mut total_files = 0;
206 let file = std::fs::File::open(source)?;
207 let mut archive = zip::ZipArchive::new(file)?;
208 let pb = progress_bar_with_eta(archive.len() as u64);
209 for i in 0..archive.len() {
210 let mut file = if let Some(password) = password {
211 let Ok(f) = archive.by_index_decrypt(i, password.as_bytes()) else {
212 continue;
213 };
214 f
215 } else {
216 match archive.by_index(i) {
217 Ok(f) => f,
218 Err(e) => {
219 eprintln!("ZipError: {e}");
220 continue;
221 }
222 }
223 };
224
225 if (*file.name()).ends_with('/') {
226 continue;
227 }
228
229 let mut contents = Vec::new();
230 if let Err(e) = std::io::copy(&mut file, &mut contents) {
231 eprintln!("ZipError: {e}");
232 continue;
233 }
234
235 let this_type = FileTypeUnion::from_bytes(&contents);
236 summary
237 .entry(this_type)
238 .and_modify(|e| *e += 1)
239 .or_insert(1);
240
241 if this_type.is_unknown() && unknown_magic > 0 {
242 let first_bytes = contents
243 .iter()
244 .take(unknown_magic)
245 .copied()
246 .collect::<Vec<_>>();
247
248 #[cfg(not(feature = "libmagic"))]
249 unknowns
250 .entry(first_bytes)
251 .and_modify(|e| *e += 1)
252 .or_insert(1);
253
254 #[cfg(feature = "libmagic")]
255 {
256 if let Some(entry) = unknowns.get_mut(&first_bytes) {
257 entry.0 += 1;
258 } else {
259 let result = cookie.buffer(&contents)?;
260 unknowns.insert(first_bytes, (1, result));
261 }
262 }
263 }
264
265 total_files += 1;
266 pb.inc(1);
267 }
268 pb.finish_and_clear();
269
270 Ok(ZipSummaryDetails {
271 file_type_counts: summary,
272 unknown_magic_counts: unknowns,
273 total_files,
274 })
275}
276
277fn progress_bar_with_eta(len: u64) -> ProgressBar {
279 ProgressBar::new(len)
280 .with_style(ProgressStyle::with_template("{wide_bar} {pos}/{len} {eta}").unwrap())
281}