pars_core/util/tree/
convert.rs

1use std::collections::{HashSet, VecDeque};
2use std::fs::{self, canonicalize, DirEntry};
3use std::path::{Path, PathBuf};
4use std::{io, mem};
5
6use anyhow::Result;
7use bumpalo::collections::Vec as BumpVec;
8use bumpalo::Bump;
9use log::debug;
10use regex::Regex;
11
12use super::{DirTree, FilterType, TreeConfig, TreeNode};
13use crate::util::fs_util::{filename_to_str, path_to_str};
14
15impl<'a> DirTree<'a> {
16    pub fn new(config: &TreeConfig<'a>, bump: &'a Bump) -> Result<Self> {
17        let mut tree = DirTree::build_tree(config, bump)?;
18        Self::apply_whitelist(&config, &mut tree);
19        Self::shrink_tree(&mut tree);
20        Ok(tree)
21    }
22
23    fn apply_whitelist(config: &&TreeConfig, tree: &mut DirTree) {
24        if config.filter_type != FilterType::Include {
25            return;
26        }
27        let mut stack: VecDeque<(usize, usize)> = VecDeque::<(usize, usize)>::new();
28        stack.push_back((tree.root, 0));
29
30        while let Some((node_idx, vec_idx)) = stack.pop_back() {
31            let child_idx = {
32                let parent = &tree.map[node_idx];
33                if vec_idx >= parent.children.len() {
34                    continue;
35                }
36                parent.children[vec_idx]
37            };
38            let child_node = &mut tree.map[child_idx];
39            if !Self::filter_match(&config.filters, &child_node.name) {
40                child_node.visible = false;
41                stack.push_back((node_idx, vec_idx + 1));
42                if !child_node.children.is_empty() {
43                    stack.push_back((child_idx, 0));
44                }
45            } else {
46                let mut parent_idx = node_idx;
47                loop {
48                    let parent = &mut tree.map[parent_idx];
49                    if parent.visible {
50                        break;
51                    }
52                    parent.visible = true;
53                    if parent.parent.is_none() {
54                        break;
55                    }
56                    parent_idx = parent.parent.unwrap();
57                }
58                stack.push_back((node_idx, vec_idx + 1));
59            }
60        }
61    }
62
63    fn shrink_tree(tree: &mut DirTree) {
64        let mut queue = VecDeque::<usize>::new();
65        queue.push_back(tree.root);
66
67        while let Some(node_idx) = queue.pop_front() {
68            let mut children = mem::take(&mut tree.map[node_idx].children);
69            children.retain(|child_idx| {
70                if tree.map[*child_idx].visible {
71                    queue.push_back(*child_idx);
72                    true
73                } else {
74                    false
75                }
76            });
77            tree.map[node_idx].children = children;
78        }
79    }
80
81    fn build_tree<'b>(config: &'b TreeConfig<'a>, bump: &'a Bump) -> Result<Self> {
82        let root = config.root.join(config.target);
83
84        let mut tree = DirTree { map: BumpVec::new_in(bump), root: 0 };
85        let mut path_set = HashSet::<PathBuf>::new();
86
87        tree.map.push(TreeNode {
88            name: config.target.to_string(),
89            parent: None,
90            children: Vec::with_capacity(Self::count_sub_entry(&root)),
91            node_type: root.as_path().into(),
92            symlink_target: None, // No need to store root's symlink target
93            is_recursive: false,
94            visible: true,
95        });
96
97        path_set.insert(canonicalize(&root)?);
98
99        let mut stack = VecDeque::<(usize, Box<dyn Iterator<Item = io::Result<DirEntry>>>)>::new();
100        stack.push_back((0, Box::new(Self::read_dir_sorted(&root)?)));
101
102        while let Some((parent_idx, mut entry_iter)) = stack.pop_back() {
103            if let Some(entry) = entry_iter.next() {
104                let entry = entry?;
105                let entry_type = entry.file_type()?;
106                let entry_name = filename_to_str(&entry.path())?.to_string();
107
108                let is_git_dir = &entry_name == ".git" && entry_type.is_dir();
109                let is_dot_gpg_id = &entry_name == ".gpg-id" && entry_type.is_file();
110                let match_blacklist = config.filter_type == FilterType::Exclude
111                    && Self::filter_match(&config.filters, filename_to_str(&entry.path())?);
112                if is_git_dir || is_dot_gpg_id || match_blacklist {
113                    stack.push_back((parent_idx, entry_iter));
114                    continue;
115                }
116
117                let mut real_path = entry.path();
118                while real_path.is_symlink() {
119                    real_path = real_path.read_link()?;
120                }
121                let canonical_path = canonicalize(&real_path)?;
122                let is_recursive_link =
123                    path_set.contains(&canonical_path) && entry_type.is_symlink();
124
125                tree.map.push(TreeNode {
126                    name: entry_name,
127                    parent: Some(parent_idx),
128                    children: Vec::with_capacity(if is_recursive_link {
129                        0
130                    } else {
131                        if canonical_path.is_dir() {
132                            path_set.insert(canonical_path);
133                        }
134                        Self::count_sub_entry(&entry.path())
135                    }),
136                    node_type: entry.path().into(),
137                    symlink_target: if entry_type.is_symlink() {
138                        Some(path_to_str(&real_path)?.to_string())
139                    } else {
140                        None
141                    },
142                    is_recursive: is_recursive_link,
143                    visible: true,
144                });
145                let child_idx = tree.map.len() - 1;
146                debug!("Create tree node, Index {}: {:?}", child_idx, tree.map[child_idx]);
147                tree.map[parent_idx].children.push(child_idx);
148
149                stack.push_back((parent_idx, entry_iter));
150                let need_iterate_child =
151                    entry_type.is_dir() || (entry_type.is_symlink() && real_path.is_dir());
152                if !is_recursive_link && need_iterate_child {
153                    stack.push_back((child_idx, Box::new(Self::read_dir_sorted(entry.path())?)));
154                }
155            }
156        }
157        Ok(tree)
158    }
159
160    fn read_dir_sorted<P: AsRef<Path>>(
161        path: P,
162    ) -> io::Result<impl Iterator<Item = io::Result<DirEntry>>> {
163        let mut entries: Vec<DirEntry> =
164            fs::read_dir(path)?.collect::<Result<Vec<_>, io::Error>>()?;
165        entries.sort_by_key(|e| e.file_name().to_string_lossy().into_owned());
166        Ok(entries.into_iter().map(Ok).fuse())
167    }
168
169    fn count_sub_entry(path: &Path) -> usize {
170        if let Ok(dir) = path.read_dir() {
171            dir.count()
172        } else {
173            0
174        }
175    }
176
177    fn filter_match(filters: &Vec<Regex>, target: &str) -> bool {
178        for filter in filters {
179            if filter.is_match(target) {
180                return true;
181            }
182        }
183        false
184    }
185}