1use crate::error::{GdownError, Result};
4use flate2::read::GzDecoder;
5use std::fs::File;
6use std::path::{Path, PathBuf};
7use tar::Archive as TarArchive;
8use zip::ZipArchive;
9
10const SUPPORTED_EXTENSIONS: &[&str] = &[
12 "zip", "tar", "tar.gz", "tgz", "tar.bz2", "tbz",
13];
14
15fn is_safe_path(destination: &Path, member_path: &Path) -> bool {
17 let normalized: PathBuf = member_path
18 .components()
19 .filter(|c| !matches!(c, std::path::Component::ParentDir))
20 .collect();
21
22 let full_path = destination.join(&normalized);
23 full_path.starts_with(destination)
24}
25
26fn sanitize_filename(filename: &str) -> String {
34 let filename = filename.replace("\x00", "");
35 let filename = filename.replace("/", "_").replace("\\", "_").trim().to_string();
36 if filename.is_empty() || filename == "." || filename == ".." {
37 return "_".to_string();
38 }
39 filename
40}
41
42pub fn extractall(archive: &Path, destination: &Path, quiet: bool) -> Result<Vec<PathBuf>> {
44 let extension = archive
45 .extension()
46 .and_then(|e| e.to_str())
47 .unwrap_or("");
48
49 let stem = archive.file_stem().and_then(|s| s.to_str()).unwrap_or("");
50
51 let full_ext = if stem.ends_with(".tar") && extension == "gz" {
52 "tar.gz"
53 } else if stem.ends_with(".tar") && extension == "bz2" {
54 "tar.bz2"
55 } else {
56 extension
57 };
58
59 let extracted = match full_ext {
60 "zip" => extract_zip(archive, destination),
61 "tar" => extract_tar(archive, destination),
62 "gz" | "tgz" if stem.ends_with(".tar") => extract_tar_gz(archive, destination),
63 "tar.gz" => extract_tar_gz(archive, destination),
64 "bz2" | "tbz" if stem.ends_with(".tar") => extract_tar_bz2(archive, destination),
65 "tar.bz2" => extract_tar_bz2(archive, destination),
66 _ => Err(GdownError::Extraction(format!("Unsupported archive format: {}", full_ext))),
67 }?;
68
69 if !quiet {
70 println!("Extracted {} files to {:?}", extracted.len(), destination);
71 }
72
73 Ok(extracted)
74}
75
76fn extract_zip(archive: &Path, destination: &Path) -> Result<Vec<PathBuf>> {
78 let file = File::open(archive).map_err(GdownError::Io)?;
79 let mut zip = ZipArchive::new(file).map_err(|e| GdownError::Extraction(e.to_string()))?;
80 let mut extracted = Vec::new();
81
82 for i in 0..zip.len() {
83 let mut file = zip.by_index(i).map_err(|e| GdownError::Extraction(e.to_string()))?;
84 let outpath = destination.join(file.name());
85
86 if !is_safe_path(destination, &outpath) {
87 continue;
88 }
89
90 let sanitized_name = sanitize_filename(file.name());
91 let final_path = destination.join(&sanitized_name);
92
93 if file.is_dir() {
94 std::fs::create_dir_all(&final_path)?;
95 } else {
96 if let Some(parent) = final_path.parent() {
97 std::fs::create_dir_all(parent)?;
98 }
99
100 let mut outfile = File::create(&final_path)?;
101 std::io::copy(&mut file, &mut outfile)?;
102 extracted.push(final_path);
103 }
104
105 #[cfg(unix)]
106 {
107 use std::os::unix::fs::PermissionsExt;
108 if let Some(mode) = file.unix_mode() {
109 std::fs::set_permissions(&final_path, std::fs::Permissions::from_mode(mode))?;
110 }
111 }
112 }
113
114 Ok(extracted)
115}
116
117fn extract_tar(archive: &Path, destination: &Path) -> Result<Vec<PathBuf>> {
119 let file = File::open(archive).map_err(GdownError::Io)?;
120 let mut tar = TarArchive::new(file);
121 let mut extracted = Vec::new();
122
123 for entry in tar.entries().map_err(|e| GdownError::Extraction(e.to_string()))? {
124 let mut entry = entry.map_err(|e| GdownError::Extraction(e.to_string()))?;
125 let path = entry.path().map_err(|e| GdownError::Extraction(e.to_string()))?.into_owned();
126
127 if !is_safe_path(destination, &path) {
128 continue;
129 }
130
131 let entry_type = entry.header().entry_type();
132 if entry_type.is_symlink() || entry_type.is_hard_link() {
133 continue;
134 }
135
136 let sanitized_name = sanitize_filename(path.to_str().unwrap_or(""));
137 let final_path = destination.join(&sanitized_name);
138
139 if entry_type.is_dir() {
140 std::fs::create_dir_all(&final_path)?;
141 } else {
142 if let Some(parent) = final_path.parent() {
143 std::fs::create_dir_all(parent)?;
144 }
145 entry.unpack(&final_path).map_err(|e| GdownError::Extraction(e.to_string()))?;
146 extracted.push(final_path);
147 }
148 }
149
150 Ok(extracted)
151}
152
153fn extract_tar_gz(archive: &Path, destination: &Path) -> Result<Vec<PathBuf>> {
155 let file = File::open(archive).map_err(GdownError::Io)?;
156 let decoder = GzDecoder::new(file);
157 let mut tar = TarArchive::new(decoder);
158 let mut extracted = Vec::new();
159
160 for entry in tar.entries().map_err(|e| GdownError::Extraction(e.to_string()))? {
161 let mut entry = entry.map_err(|e| GdownError::Extraction(e.to_string()))?;
162 let path = entry.path().map_err(|e| GdownError::Extraction(e.to_string()))?.into_owned();
163
164 if !is_safe_path(destination, &path) {
165 continue;
166 }
167
168 let entry_type = entry.header().entry_type();
169 if entry_type.is_symlink() || entry_type.is_hard_link() {
170 continue;
171 }
172
173 let sanitized_name = sanitize_filename(path.to_str().unwrap_or(""));
174 let final_path = destination.join(&sanitized_name);
175
176 if entry_type.is_dir() {
177 std::fs::create_dir_all(&final_path)?;
178 } else {
179 if let Some(parent) = final_path.parent() {
180 std::fs::create_dir_all(parent)?;
181 }
182 entry.unpack(&final_path).map_err(|e| GdownError::Extraction(e.to_string()))?;
183 extracted.push(final_path);
184 }
185 }
186
187 Ok(extracted)
188}
189
190fn extract_tar_bz2(archive: &Path, destination: &Path) -> Result<Vec<PathBuf>> {
192 let file = File::open(archive).map_err(GdownError::Io)?;
193 let decoder = bzip2::read::BzDecoder::new(file);
194 let mut tar = TarArchive::new(decoder);
195 let mut extracted = Vec::new();
196
197 for entry in tar.entries().map_err(|e| GdownError::Extraction(e.to_string()))? {
198 let mut entry = entry.map_err(|e| GdownError::Extraction(e.to_string()))?;
199 let path = entry.path().map_err(|e| GdownError::Extraction(e.to_string()))?.into_owned();
200
201 if !is_safe_path(destination, &path) {
202 continue;
203 }
204
205 let entry_type = entry.header().entry_type();
206 if entry_type.is_symlink() || entry_type.is_hard_link() {
207 continue;
208 }
209
210 let sanitized_name = sanitize_filename(path.to_str().unwrap_or(""));
211 let final_path = destination.join(&sanitized_name);
212
213 if entry_type.is_dir() {
214 std::fs::create_dir_all(&final_path)?;
215 } else {
216 if let Some(parent) = final_path.parent() {
217 std::fs::create_dir_all(parent)?;
218 }
219 entry.unpack(&final_path).map_err(|e| GdownError::Extraction(e.to_string()))?;
220 extracted.push(final_path);
221 }
222 }
223
224 Ok(extracted)
225}
226
227pub fn is_archive(path: &Path) -> bool {
229 let extension = path
230 .extension()
231 .and_then(|e| e.to_str())
232 .unwrap_or("");
233
234 let stem = path.file_stem().and_then(|s| s.to_str()).unwrap_or("");
235
236 SUPPORTED_EXTENSIONS.contains(&extension)
237 || (stem.ends_with(".tar") && (extension == "gz" || extension == "bz2"))
238}
239
240#[cfg(test)]
241mod tests {
242 use super::*;
243
244 #[test]
245 fn test_is_safe_path() {
246 let dest = Path::new("C:\\tmp\\extract");
247 let safe = Path::new("C:\\tmp\\extract\\nested\\file.txt");
248 assert!(is_safe_path(dest, safe));
249 }
250
251 #[test]
252 fn test_sanitize_filename() {
253 assert_eq!(sanitize_filename("..根目录"), "..根目录");
257 assert_eq!(sanitize_filename(".."), "_");
258 assert_eq!(sanitize_filename("."), "_");
259 assert_eq!(sanitize_filename(""), "_");
260 assert_eq!(sanitize_filename("normal.txt"), "normal.txt");
261 assert_eq!(sanitize_filename("Budget/2024.pdf"), "Budget_2024.pdf");
262 assert_eq!(sanitize_filename("path\\to\\file.pdf"), "path_to_file.pdf");
263 assert_eq!(sanitize_filename("file\x00name.txt"), "filename.txt");
264
265 assert_eq!(sanitize_filename("name/with/slashes.txt"), "name_with_slashes.txt");
267 assert_eq!(sanitize_filename("name\\with\\backslashes.txt"), "name_with_backslashes.txt");
268 assert_eq!(sanitize_filename("\x00nullbyte"), "nullbyte");
269 assert_eq!(sanitize_filename(" file.txt "), "file.txt");
270 assert_eq!(sanitize_filename("/leading slash"), "_leading slash");
271 assert_eq!(sanitize_filename("trailing slash/"), "trailing slash_");
272 assert_eq!(sanitize_filename("multiple///slashes"), "multiple___slashes");
273 }
274
275 #[test]
276 fn test_is_safe_path_unsafe_windows() {
277 let dest = Path::new("C:\\tmp\\extract");
278 let unsafe_path = Path::new("C:\\tmp\\..\\..\\Windows\\System32");
282 assert!(!is_safe_path(dest, unsafe_path));
285 }
286
287 #[test]
288 fn test_is_safe_path_absolute() {
289 let dest = Path::new("C:\\tmp\\extract");
290 let absolute = Path::new("C:\\Windows\\system32\\file.txt");
291 assert!(!is_safe_path(dest, absolute));
292 }
293
294 #[test]
295 fn test_is_safe_path_same_file() {
296 let dest = Path::new("C:\\tmp\\extract");
297 let same = Path::new("C:\\tmp\\extract");
298 assert!(is_safe_path(dest, same));
299 }
300
301 #[test]
302 fn test_is_safe_path_parent_in_name() {
303 let dest = Path::new("C:\\tmp\\extract");
304 let parent_in_name = Path::new("C:\\tmp\\extract\\..hidden\\file.txt");
306 assert!(is_safe_path(dest, parent_in_name));
307 }
308
309 #[test]
310 fn test_is_archive() {
311 assert!(is_archive(Path::new("file.zip")));
312 assert!(is_archive(Path::new("file.tar")));
313 assert!(is_archive(Path::new("file.tar.gz")));
314 assert!(is_archive(Path::new("file.tgz")));
315 assert!(is_archive(Path::new("file.tar.bz2")));
316 assert!(!is_archive(Path::new("file.txt")));
317 }
318}