deep_unpack/
unpack.rs

1//! packing archive files from folders
2use std::{
3    path::{Path, PathBuf},
4    sync::mpsc,
5};
6
7use anyhow::{Context, Result};
8use derive_builder::Builder;
9use ignore::WalkBuilder;
10use lazy_static::lazy_static;
11use rayon::prelude::*;
12
13use crate::{
14    data::{NoWalkList, UnpackStatus},
15    formats::kinds::ArchiveKind,
16};
17
18/// Skip searching archive file from a list of directories
19const NO_WALK_YAML: &str = include_str!("./no_walk.yaml");
20
21lazy_static! {
22    pub static ref NO_WALK_LIST: NoWalkList = serde_yaml::from_str(NO_WALK_YAML).unwrap();
23}
24
25#[derive(Debug, Clone)]
26/// List of archive files that detected during the walk-on directories
27struct WalkResult {
28    pub archive_kind: ArchiveKind,
29    pub path_buf: PathBuf,
30}
31
32#[derive(Default, Builder, Debug)]
33#[builder(setter(into))]
34pub struct DeepWalk {
35    #[builder(default = "\".\".to_string()")]
36    pub folder: String,
37    #[builder(default = "\"deep_unpack\".to_string()")]
38    pub unpack_folder: String,
39    #[builder(default = "self.default_no_walk()")]
40    pub no_walk: NoWalkList,
41    #[builder(default = "1", field(type = "u32"))]
42    pub unpack_level: u32,
43    #[builder(default = "\"__${FILENAME}$__\".to_string()")]
44    pub extract_template: String,
45}
46
47impl DeepWalk {
48    #[must_use]
49    #[allow(clippy::new_ret_no_self)]
50    pub fn new() -> DeepWalkBuilder {
51        DeepWalkBuilder::default()
52    }
53}
54
55impl DeepWalkBuilder {
56    #[allow(clippy::unused_self)]
57    fn default_no_walk(&self) -> NoWalkList {
58        NO_WALK_LIST.clone()
59    }
60
61    pub fn extract(&self) -> Result<Vec<UnpackStatus>> {
62        let unpack_config = self.build()?;
63
64        let root_path = Path::new(&unpack_config.folder);
65        let unpack_folder = Path::new(&unpack_config.unpack_folder);
66
67        // first, find archive files from all the root path directories.
68        let walk_result = Self::find_comppress_files(&root_path, unpack_config.no_walk.clone());
69
70        if walk_result.is_empty() {
71            return Ok(vec![]);
72        }
73
74        let mut result: Vec<UnpackStatus> = vec![];
75        result.extend(Self::parallel_unpack(
76            walk_result,
77            Some(root_path),
78            unpack_folder,
79            &unpack_config.extract_template,
80        ));
81
82        // if a deep level is bigger than 1, search in the extracted folder if there are
83        // more archive files. If yes, extract them also
84        let mut unpacked_files: Vec<String> = vec![];
85        for _ in 2..=unpack_config.unpack_level {
86            let walk_result =
87                Self::find_comppress_files(&unpack_folder, unpack_config.no_walk.clone())
88                    .iter()
89                    .filter(|f| {
90                        // make sure that we are not unpacking the same file twice
91                        let path_str = f.path_buf.display().to_string();
92                        if unpacked_files.contains(&path_str) {
93                            false
94                        } else {
95                            unpacked_files.push(path_str);
96                            true
97                        }
98                    })
99                    .cloned()
100                    .collect::<Vec<_>>();
101
102            if walk_result.is_empty() {
103                break;
104            }
105            result.extend(Self::parallel_unpack(
106                walk_result,
107                None,
108                unpack_folder,
109                &unpack_config.extract_template,
110            ));
111        }
112        Ok(result)
113    }
114
115    /// Return list of archive files from a given folder
116    fn find_comppress_files<P: AsRef<Path>>(path: &P, no_walk: NoWalkList) -> Vec<WalkResult> {
117        let (tx, rx) = mpsc::channel();
118        WalkBuilder::new(path)
119            .filter_entry(move |entry| {
120                if let Some(ep) = entry.path().to_str() {
121                    if no_walk.ignores.iter().any(|item| item.is_match(ep)) {
122                        return false;
123                    }
124                }
125                true
126            })
127            .hidden(false)
128            .git_ignore(true)
129            .threads(num_cpus::get())
130            .build_parallel()
131            .run(move || {
132                let tx = tx.clone();
133                Box::new(move |result| {
134                    if let Ok(de) = result {
135                        let metadata = match de.metadata() {
136                            Ok(m) => m,
137                            Err(e) => {
138                                log::info!("could not get dir entry medatada. {}", e);
139                                return ignore::WalkState::Continue;
140                            }
141                        };
142
143                        if metadata.is_dir() {
144                            return ignore::WalkState::Continue;
145                        }
146
147                        // check if the file is comppreesed file
148                        let path_buf = de.path().to_path_buf();
149                        if let Some(archive_kind) = ArchiveKind::for_path(&path_buf) {
150                            if let Err(err) = tx.send(WalkResult {
151                                archive_kind,
152                                path_buf,
153                            }) {
154                                log::info!(
155                                    "could not send extract status struct to channel. {}",
156                                    err
157                                );
158                            }
159                        }
160                    }
161                    ignore::WalkState::Continue
162                })
163            });
164        rx.iter().into_iter().collect::<Vec<WalkResult>>()
165    }
166
167    /// unpack list of [`WalkResult`] in parallel
168    fn parallel_unpack(
169        walk_result: Vec<WalkResult>,
170        root_path: Option<&Path>,
171        unpack_root_folder: &Path,
172        extract_template: &str,
173    ) -> Vec<UnpackStatus> {
174        walk_result
175            .par_iter()
176            .map(|archive_path| {
177                let mut archive = archive_path.archive_kind.new(&archive_path.path_buf);
178
179                let file_unpack_path = match root_path {
180                    Some(p) => match archive_path.path_buf.strip_prefix(p) {
181                        Ok(a) => unpack_root_folder.join(a),
182                        Err(e) => {
183                            log::debug!(
184                                "could not strip: {} with prefix: {}. err: {}",
185                                archive_path.path_buf.display(),
186                                p.display(),
187                                e
188                            );
189                            unpack_root_folder.to_path_buf()
190                        }
191                    },
192                    None => archive_path.path_buf.clone(),
193                };
194
195                let unpack_folder = match split_file_by_name(file_unpack_path.as_path()) {
196                    Ok((file_name, folder)) => {
197                        folder.join(extract_template.replace("{FILENAME}", &file_name))
198                    }
199                    Err(e) => {
200                        log::debug!("ould not split file by name. err: {}", e);
201                        unpack_root_folder.to_path_buf()
202                    }
203                };
204
205                match archive.unpack(&unpack_folder) {
206                    Ok(()) => UnpackStatus {
207                        archive_file: archive_path.path_buf.clone(),
208                        extract_to: Some(unpack_folder),
209                        err: None,
210                    },
211                    Err(e) => UnpackStatus {
212                        archive_file: archive_path.path_buf.clone(),
213                        extract_to: None,
214                        err: Some(format!("{}", e)),
215                    },
216                }
217            })
218            .collect::<Vec<_>>()
219    }
220}
221
222/// Split path to file name and parant path
223fn split_file_by_name(path: &Path) -> Result<(String, PathBuf)> {
224    let file_name = path
225        .file_name()
226        .and_then(std::ffi::OsStr::to_str)
227        .map(std::string::ToString::to_string)
228        .context("could get file name")?;
229
230    let parent = path
231        .parent()
232        .map(std::path::Path::to_path_buf)
233        .context("could get parent file")?;
234
235    Ok((file_name, parent))
236}
237
238#[cfg(test)]
239mod test_formats_kinds {
240
241    use insta::{assert_debug_snapshot, with_settings};
242
243    use super::*;
244
245    #[test]
246    fn can_split_file_by_name() {
247        let path = Path::new("foo").join("bar").join("baz.tar.gz");
248
249        with_settings!({filters => vec![
250            (r"\\\\", "/"), // for windows
251        ]}, {
252            assert_debug_snapshot!(split_file_by_name(&path));
253        });
254    }
255}