tgrep/utils/
walker.rs

1use std::{
2    collections::BTreeMap,
3    env,
4    fs::{self, DirEntry},
5    io,
6    path::{Path, PathBuf},
7    rc::Rc,
8    sync::atomic::{AtomicBool, Ordering},
9    sync::Arc,
10};
11
12use crossbeam::sync::WaitGroup;
13use futures::executor::ThreadPool;
14use log::{debug, error, info, warn};
15
16use crate::utils::display::Display;
17use crate::utils::filters::Filters;
18use crate::utils::grep::Grep;
19use crate::utils::lines::Zero;
20use crate::utils::mapped::Mapped;
21use crate::utils::matcher::Matcher;
22use crate::utils::patterns::{Patterns, ToPatterns};
23use crate::utils::writer::BufferedWriter;
24
25static GIT_IGNORE: &str = ".gitignore";
26pub const GIT_DIR: &str = ".git";
27
28#[derive(Clone)]
29pub struct Walker {
30    tpool: Option<ThreadPool>,
31    ignore_patterns: Arc<Patterns>,
32    force_ignore_patterns: Arc<Patterns>,
33    file_filters: Arc<Filters>,
34    grep: Grep,
35    matcher: Matcher,
36    ignore_symlinks: bool,
37    display: Arc<dyn Display>,
38    print_file_separator: bool,
39    file_separator_printed: Rc<AtomicBool>,
40}
41
42pub struct WalkerBuilder(Walker);
43
44impl WalkerBuilder {
45    pub fn new(grep: Grep, matcher: Matcher, display: Arc<dyn Display>) -> Self {
46        WalkerBuilder {
47            0: Walker::new(grep, matcher, display),
48        }
49    }
50
51    pub fn thread_pool(mut self, tpool: ThreadPool) -> WalkerBuilder {
52        self.0.tpool = Some(tpool);
53        self
54    }
55
56    pub fn ignore_patterns(mut self, ignore_patterns: Patterns) -> WalkerBuilder {
57        self.0.ignore_patterns = Arc::new(ignore_patterns);
58        self
59    }
60
61    pub fn force_ignore_patterns(mut self, force_ignore_patterns: Patterns) -> WalkerBuilder {
62        self.0.force_ignore_patterns = Arc::new(force_ignore_patterns);
63        self
64    }
65
66    pub fn file_filters(mut self, file_filters: Filters) -> WalkerBuilder {
67        self.0.file_filters = Arc::new(file_filters);
68        self
69    }
70
71    pub fn ignore_symlinks(mut self, ignore_symlinks: bool) -> WalkerBuilder {
72        self.0.ignore_symlinks = ignore_symlinks;
73        self
74    }
75
76    pub fn print_file_separator(mut self, print_file_separator: bool) -> WalkerBuilder {
77        self.0.print_file_separator = print_file_separator;
78        self
79    }
80
81    pub fn build(self) -> Walker {
82        self.0
83    }
84}
85
86impl Walker {
87    pub fn new(grep: Grep, matcher: Matcher, display: Arc<dyn Display>) -> Self {
88        Walker {
89            tpool: None,
90            ignore_patterns: Default::default(),
91            force_ignore_patterns: Default::default(),
92            file_filters: Default::default(),
93            grep,
94            matcher,
95            ignore_symlinks: false,
96            display,
97            print_file_separator: false,
98            file_separator_printed: Default::default(),
99        }
100    }
101
102    fn is_ignore_file(&self, entry: &DirEntry) -> bool {
103        Some(GIT_IGNORE) == entry.file_name().to_str()
104    }
105
106    fn is_excluded(&self, path: &Path, is_dir: bool) -> bool {
107        let path = path.to_str().unwrap();
108        let skip = self.force_ignore_patterns.is_excluded(path, is_dir);
109        if skip {
110            info!("Skipping [forced] {:?}", path);
111            return true;
112        }
113        let skip = self.ignore_patterns.is_excluded(path, is_dir);
114        if skip {
115            info!("Skipping {:?}", path);
116        }
117        skip
118    }
119
120    fn process_gitignore(path: &Path) -> Option<Patterns> {
121        let ifile = {
122            let mut ifile = path.to_path_buf();
123            ifile.push(GIT_IGNORE);
124            ifile
125        };
126        match ifile.to_patterns() {
127            Ok(ignore_patterns) => Some(ignore_patterns),
128            Err(e) => {
129                match e.downcast_ref::<io::Error>() {
130                    Some(e) if e.kind() == io::ErrorKind::NotFound => {}
131                    _ => error!("Failed to process path '{}': {:?}", ifile.display(), e),
132                };
133                None
134            }
135        }
136    }
137
138    fn contains_git_dir(path: &Path) -> bool {
139        let mut path = path.to_path_buf();
140        path.push(GIT_DIR);
141        path.exists()
142    }
143
144    fn walk_dir(&self, path: &Path, parents: &[PathBuf]) {
145        let walker = {
146            let mut walker = self.clone();
147            if let Some(mut ignore_patterns) = Self::process_gitignore(path) {
148                ignore_patterns.extend(&walker.ignore_patterns);
149                walker.ignore_patterns = Arc::new(ignore_patterns);
150            }
151            walker
152        };
153
154        let mut to_dive = BTreeMap::new();
155        let mut to_grep = Vec::new();
156
157        let entries: Vec<_> = fs::read_dir(path)
158            .unwrap()
159            .filter_map(|entry| entry.ok())
160            .filter(|entry| !self.is_ignore_file(entry))
161            .filter_map(|entry| match entry.metadata() {
162                Ok(meta) => Some((entry.path(), meta)),
163                Err(e) => {
164                    error!("Failed to get path '{}' metadata: {}", path.display(), e);
165                    None
166                }
167            })
168            .filter(|(entry, meta)| !walker.is_excluded(entry, meta.is_dir()))
169            .collect();
170        for (path, meta) in entries {
171            let file_type = meta.file_type();
172            if file_type.is_file() {
173                if !self.file_filters.matches(path.to_str().unwrap()) {
174                    continue;
175                }
176                to_grep.push((path, meta.len() as usize));
177            } else {
178                to_dive.insert(path, meta);
179            }
180        }
181
182        let parents = {
183            let mut parents = parents.to_owned();
184            parents.push(path.to_path_buf());
185            parents
186        };
187        for (entry, meta) in to_dive {
188            walker.walk_with_parents(&entry, Some(meta), &parents);
189        }
190
191        self.grep_many(&to_grep);
192    }
193
194    fn grep(
195        grep: Grep,
196        entry: Arc<PathBuf>,
197        len: usize,
198        matcher: Matcher,
199        display: Arc<dyn Display>,
200    ) {
201        match Mapped::new(&entry, len) {
202            Ok(mapped) => {
203                if content_inspector::inspect(&*mapped).is_binary() {
204                    debug!("Skipping binary file '{}'", entry.display());
205                    return;
206                }
207                (grep)(Arc::new(mapped), matcher, display);
208            }
209            Err(e) => {
210                warn!("Failed to map file '{}': {}", entry.display(), e);
211                (grep)(entry, matcher, display);
212            }
213        }
214    }
215
216    fn grep_many(&self, entries: &[(PathBuf, usize)]) {
217        let writer = self.display.writer();
218        let mut writers = BTreeMap::new();
219        let wg = WaitGroup::new();
220        for (entry, len) in entries {
221            let entry = Arc::new(entry.clone());
222            let matcher = self.matcher.clone();
223            let writer = Arc::new(BufferedWriter::new());
224            let display = self.display.with_writer(writer.clone());
225            writers.insert(entry.clone(), writer);
226            let len = *len;
227            if len == 0 {
228                (self.grep)(Arc::new(Zero::new((*entry).clone())), matcher, display);
229                continue;
230            }
231            if entries.len() < 3 {
232                Walker::grep(self.grep.clone(), entry, len, matcher, display);
233                continue;
234            }
235            match &self.tpool {
236                Some(tpool) => {
237                    let grep = self.grep.clone();
238                    let wg = wg.clone();
239                    tpool.spawn_ok(async move {
240                        Walker::grep(grep, entry, len, matcher, display);
241                        drop(wg);
242                    });
243                }
244                None => Walker::grep(self.grep.clone(), entry, len, matcher, display),
245            }
246        }
247        wg.wait();
248        for (_, w) in writers {
249            if self.print_file_separator
250                && w.has_some()
251                && self.file_separator_printed.swap(true, Ordering::Relaxed)
252            {
253                self.display.file_separator();
254            }
255            w.flush(&writer);
256        }
257    }
258
259    fn canonicalize(&self, orig: &Path, resolved: &Path) -> anyhow::Result<PathBuf> {
260        let cwd = env::current_dir()?;
261        let parent = orig
262            .parent()
263            .ok_or_else(|| anyhow::Error::msg("no parent"))?;
264        env::set_current_dir(&parent)?;
265        let path = resolved
266            .canonicalize()
267            .map_err(|e| anyhow::Error::new(e).context(format!("cwd {}", parent.display())));
268        env::set_current_dir(&cwd)?;
269        path
270    }
271
272    fn process_symlink(&self, orig: &Path, resolved: &Path, parents: &[PathBuf]) {
273        let path = self.canonicalize(orig, resolved);
274        if let Err(e) = path {
275            error!("Failed to canonicalize '{}': {}", resolved.display(), e);
276            return;
277        }
278        let path = path.unwrap();
279        if let Some(level) = parents.iter().position(|parent| *parent == path) {
280            error!(
281                "Symlink '{}' -> '{}' (dereferenced to '{}') loop detected at level {}",
282                orig.display(),
283                resolved.display(),
284                path.display(),
285                level,
286            );
287            return;
288        }
289        if parents.iter().any(|parent| path.starts_with(parent)) {
290            info!(
291                "Skipping symlink '{}' -> '{}' (dereferenced to '{}')",
292                orig.display(),
293                resolved.display(),
294                path.display(),
295            );
296            return;
297        }
298        self.walk_with_parents(&path, None, &{
299            let mut parents = parents.to_owned();
300            parents.push(path.clone());
301            parents
302        });
303    }
304
305    fn walk_with_parents(&self, path: &Path, meta: Option<fs::Metadata>, parents: &[PathBuf]) {
306        let meta = meta.or_else(|| match fs::symlink_metadata(path) {
307            Ok(meta) => Some(meta),
308            Err(e) => {
309                error!("Failed to get path '{}' metadata: {}", path.display(), e);
310                None
311            }
312        });
313        let meta = match meta {
314            Some(meta) => meta,
315            _ => return,
316        };
317        let file_type = meta.file_type();
318        if file_type.is_dir() {
319            self.walk_dir(path, parents);
320        } else if file_type.is_file() {
321            Walker::grep(
322                self.grep.clone(),
323                Arc::new(path.to_path_buf()),
324                meta.len() as usize,
325                self.matcher.clone(),
326                self.display.clone(),
327            );
328        } else if file_type.is_symlink() {
329            if self.ignore_symlinks {
330                info!("Skipping symlink '{}'", path.display());
331                return;
332            }
333            match fs::read_link(path) {
334                Ok(resolved) => self.process_symlink(path, &resolved, parents),
335                Err(e) => error!("Failed to read link '{}': {}", path.display(), e),
336            }
337        } else {
338            warn!("Unhandled path '{}': {:?}", path.display(), file_type)
339        }
340    }
341
342    pub fn find_ignore_patterns_in_parents(path: &Path) -> Option<Patterns> {
343        if Self::contains_git_dir(path) {
344            return None;
345        }
346        let mut patterns = Vec::new();
347        let mut path = path.to_path_buf();
348        while path.pop() {
349            if let Some(ignore_patterns) = Self::process_gitignore(&path) {
350                debug!("Found .gitignore in {}", path.display());
351                patterns.push(ignore_patterns);
352            }
353            if Self::contains_git_dir(&path) {
354                break;
355            }
356        }
357        if patterns.is_empty() {
358            return None;
359        }
360        let mut ignore_patterns = Patterns::default();
361        for pattern in patterns {
362            ignore_patterns.extend(&pattern);
363        }
364        Some(ignore_patterns)
365    }
366
367    pub fn walk(&self, path: &Path) {
368        self.walk_with_parents(path, None, &[]);
369    }
370}