utils_box/
archives.rs

1//! # Archives utilities
2//! A toolbox of small utilities that extract files from archives.
3//! Useful for retrieving files from various types of archives like tar, tar.gz, zip.
4
5use anyhow::{Result, bail};
6use flate2::read::GzDecoder;
7use std::{
8    fs::File,
9    io::{self, Read},
10    path::PathBuf,
11};
12use tar::Archive;
13use zip::ZipArchive;
14
15use crate::{log_info, log_trace};
16
17pub static GZ_SIGNATURE: [u8; 3] = [0x1F, 0x8B, 0x08];
18pub static ZIP_SIGNATURE: [u8; 3] = [0x50, 0x4B, 0x03];
19
20#[derive(Debug, Clone, PartialEq, Eq)]
21pub enum ArchiveType {
22    Tar,
23    Gz,
24    Zip,
25}
26
27/// Entry Point:
28/// Extract the selected file from the provided archive into the selected destination directory
29/// It detects if it is a compressed archive using the magic numbers from: https://www.garykessler.net/library/file_sigs.html
30pub fn archive_extract_file(
31    archive: PathBuf,
32    filename: PathBuf,
33    destination: PathBuf,
34) -> Result<()> {
35    // Check archive type
36    let mut file = File::open(&archive)?;
37    let mut magic_number: Vec<u8> = vec![0x0; 3];
38
39    file.read_exact(&mut magic_number)?;
40
41    if magic_number == GZ_SIGNATURE {
42        log_info!("[archive_extract_file] Detected [tar.gz] archive");
43        extract_file(archive, ArchiveType::Gz, filename, destination)
44    } else if magic_number == ZIP_SIGNATURE {
45        log_info!("[archive_extract_all] Detected [zip] archive");
46        extract_file(archive, ArchiveType::Zip, filename, destination)
47    } else {
48        log_info!("[archive_extract_file] Detected [tar] archive");
49        extract_file(archive, ArchiveType::Tar, filename, destination)
50    }
51}
52
53/// Entry Point:
54/// Extract all contents from the provided archive into the selected destination directory
55/// The destination directory will be created if not already available
56/// It detects if it is a compressed archive using the magic numbers from: https://www.garykessler.net/library/file_sigs.html
57pub fn archive_extract_all(archive: PathBuf, destination: PathBuf) -> Result<()> {
58    // Check archive type
59    let mut file = File::open(&archive)?;
60    let mut magic_number: Vec<u8> = vec![0x0; 3];
61
62    file.read_exact(&mut magic_number)?;
63
64    if magic_number == GZ_SIGNATURE {
65        log_info!("[archive_extract_all] Detected [tar.gz] archive");
66        extract_all(archive, ArchiveType::Gz, destination)
67    } else if magic_number == ZIP_SIGNATURE {
68        log_info!("[archive_extract_all] Detected [zip] archive");
69        extract_all(archive, ArchiveType::Zip, destination)
70    } else {
71        log_info!("[archive_extract_all] Detected [tar.gz] archive");
72        extract_all(archive, ArchiveType::Tar, destination)
73    }
74}
75
76/// Extract the selected file from the provided archive into the selected destination directory
77fn extract_file(
78    archive: PathBuf,
79    archive_type: ArchiveType,
80    filename: PathBuf,
81    destination: PathBuf,
82) -> Result<()> {
83    match archive_type {
84        ArchiveType::Tar => {
85            let mut ar = Archive::new(File::open(archive)?);
86
87            // Check the contents for the requested file
88            for archived_file in ar.entries()? {
89                // Unwrap the file
90                let mut ar_file = archived_file?;
91
92                // Check if it is the file we need
93                if ar_file.path()? == filename {
94                    let _ = std::fs::create_dir(destination.clone());
95                    ar_file.unpack_in(&destination)?;
96
97                    return Ok(());
98                }
99            }
100
101            bail!("[extract_file][tar] Failed to find requested file!");
102        }
103        ArchiveType::Gz => {
104            let file = File::open(archive)?;
105            let decompressed = GzDecoder::new(file);
106            let mut ar = Archive::new(decompressed);
107
108            // Check the contents for the requested file
109            for archived_file in ar.entries()? {
110                // Unwrap the file
111                let mut ar_file = archived_file?;
112
113                // Check if it is the file we need
114                if ar_file.path()? == filename {
115                    let _ = std::fs::create_dir(destination.clone());
116                    ar_file.unpack_in(&destination)?;
117
118                    return Ok(());
119                }
120            }
121
122            bail!("[extract_file][gz] Failed to find requested file!");
123        }
124        ArchiveType::Zip => {
125            let file = File::open(archive)?;
126            let mut ar = ZipArchive::new(file)?;
127
128            for i in 0..ar.len() {
129                let mut in_file = ar.by_index(i)?;
130                let outpath = match in_file.enclosed_name() {
131                    Some(path) => path,
132                    None => {
133                        log_trace!("Entry {} has a suspicious path", in_file.name());
134                        continue;
135                    }
136                };
137
138                if outpath.file_name() == Some(filename.as_os_str()) {
139                    let _ = std::fs::create_dir(destination.clone());
140                    let mut outfile = File::create(destination.join(filename))?;
141                    io::copy(&mut in_file, &mut outfile)?;
142
143                    return Ok(());
144                }
145            }
146
147            bail!("[extract_file][zip] Failed to find requested file !");
148        }
149    };
150}
151
152/// Extract all contents from the provided archive into the selected destination directory
153/// The destination directory will be created if not already available
154fn extract_all(archive: PathBuf, archive_type: ArchiveType, destination: PathBuf) -> Result<()> {
155    match archive_type {
156        ArchiveType::Tar => {
157            let mut ar = Archive::new(File::open(archive)?);
158            let _ = std::fs::create_dir(destination.clone());
159            ar.unpack(destination)?;
160        }
161        ArchiveType::Gz => {
162            let file = File::open(archive)?;
163            let decompressed = GzDecoder::new(file);
164            let mut ar = Archive::new(decompressed);
165            let _ = std::fs::create_dir(destination.clone());
166            ar.unpack(destination)?;
167        }
168        ArchiveType::Zip => {
169            let file = File::open(archive)?;
170            let mut ar = ZipArchive::new(file)?;
171            let _ = std::fs::create_dir(destination.clone());
172            ar.extract(destination)?;
173        }
174    }
175
176    Ok(())
177}
178
179#[cfg(test)]
180mod tests {
181    use crate::archives::*;
182    use named_lock::*;
183
184    #[test]
185    fn extract_tar_test() {
186        let archive: PathBuf = std::env::current_exe()
187            .unwrap()
188            .parent()
189            .unwrap()
190            .join("../../../test_data/test_archives.tar");
191
192        let file: PathBuf = "happy_cloud.jpg".into();
193
194        // Create a named lock to make sure no other test messes with the same files!
195        let lock = NamedLock::create("archives_tests").unwrap();
196        let _guard = lock.lock().unwrap();
197
198        let destination: PathBuf = std::env::temp_dir().join("utils-box");
199
200        let _ = std::fs::remove_dir_all(destination.clone());
201
202        extract_file(archive, ArchiveType::Tar, file, destination).unwrap();
203    }
204
205    #[test]
206    fn extract_tar_all_test() {
207        let archive: PathBuf = std::env::current_exe()
208            .unwrap()
209            .parent()
210            .unwrap()
211            .join("../../../test_data/test_archives.tar");
212
213        // Create a named lock to make sure no other test messes with the same files!
214        let lock = NamedLock::create("archives_tests").unwrap();
215        let _guard = lock.lock().unwrap();
216
217        let destination: PathBuf = std::env::temp_dir().join("utils-box");
218
219        let _ = std::fs::remove_dir_all(destination.clone());
220
221        extract_all(archive, ArchiveType::Tar, destination).unwrap();
222    }
223
224    #[test]
225    fn extract_zip_test() {
226        let archive: PathBuf = std::env::current_exe()
227            .unwrap()
228            .parent()
229            .unwrap()
230            .join("../../../test_data/test_archives.zip");
231
232        let file: PathBuf = "lorem.txt".into();
233
234        let destination: PathBuf = std::env::temp_dir().join("utils-box");
235
236        // Create a named lock to make sure no other test messes with the same files!
237        let lock = NamedLock::create("archives_tests").unwrap();
238        let _guard = lock.lock().unwrap();
239
240        let _ = std::fs::remove_dir_all(destination.clone());
241
242        extract_file(archive, ArchiveType::Zip, file, destination).unwrap();
243    }
244
245    #[test]
246    fn extract_zip_all_test() {
247        let archive: PathBuf = std::env::current_exe()
248            .unwrap()
249            .parent()
250            .unwrap()
251            .join("../../../test_data/test_archives.zip");
252
253        let destination: PathBuf = std::env::temp_dir().join("utils-box");
254
255        // Create a named lock to make sure no other test messes with the same files!
256        let lock = NamedLock::create("archives_tests").unwrap();
257        let _guard = lock.lock().unwrap();
258
259        let _ = std::fs::remove_dir_all(destination.clone());
260
261        extract_all(archive, ArchiveType::Zip, destination).unwrap();
262    }
263
264    #[test]
265    fn archive_extract_all_zip_test() {
266        let archive: PathBuf = std::env::current_exe()
267            .unwrap()
268            .parent()
269            .unwrap()
270            .join("../../../test_data/test_archives.zip");
271
272        let destination: PathBuf = std::env::temp_dir().join("utils-box");
273
274        // Create a named lock to make sure no other test messes with the same files!
275        let lock = NamedLock::create("archives_tests").unwrap();
276        let _guard = lock.lock().unwrap();
277
278        let _ = std::fs::remove_dir_all(destination.clone());
279
280        archive_extract_all(archive, destination).unwrap();
281    }
282
283    #[test]
284    fn archive_extract_all_tar_test() {
285        let archive: PathBuf = std::env::current_exe()
286            .unwrap()
287            .parent()
288            .unwrap()
289            .join("../../../test_data/test_archives.tar");
290
291        let destination: PathBuf = std::env::temp_dir().join("utils-box");
292
293        // Create a named lock to make sure no other test messes with the same files!
294        let lock = NamedLock::create("archives_tests").unwrap();
295        let _guard = lock.lock().unwrap();
296
297        let _ = std::fs::remove_dir_all(destination.clone());
298
299        archive_extract_all(archive, destination).unwrap();
300    }
301
302    #[test]
303    fn archive_extract_all_targz_test() {
304        let archive: PathBuf = std::env::current_exe()
305            .unwrap()
306            .parent()
307            .unwrap()
308            .join("../../../test_data/test_archives.tar.gz");
309
310        let destination: PathBuf = std::env::temp_dir().join("utils-box");
311
312        // Create a named lock to make sure no other test messes with the same files!
313        let lock = NamedLock::create("archives_tests").unwrap();
314        let _guard = lock.lock().unwrap();
315
316        let _ = std::fs::remove_dir_all(destination.clone());
317
318        archive_extract_all(archive, destination).unwrap();
319    }
320}