1use std::{
2 collections::BTreeMap,
3 env,
4 fs::{self, DirEntry},
5 io,
6 path::{Path, PathBuf},
7 rc::Rc,
8 sync::atomic::{AtomicBool, Ordering},
9 sync::Arc,
10};
11
12use crossbeam::sync::WaitGroup;
13use futures::executor::ThreadPool;
14use log::{debug, error, info, warn};
15
16use crate::utils::display::Display;
17use crate::utils::filters::Filters;
18use crate::utils::grep::Grep;
19use crate::utils::lines::Zero;
20use crate::utils::mapped::Mapped;
21use crate::utils::matcher::Matcher;
22use crate::utils::patterns::{Patterns, ToPatterns};
23use crate::utils::writer::BufferedWriter;
24
25static GIT_IGNORE: &str = ".gitignore";
26pub const GIT_DIR: &str = ".git";
27
28#[derive(Clone)]
29pub struct Walker {
30 tpool: Option<ThreadPool>,
31 ignore_patterns: Arc<Patterns>,
32 force_ignore_patterns: Arc<Patterns>,
33 file_filters: Arc<Filters>,
34 grep: Grep,
35 matcher: Matcher,
36 ignore_symlinks: bool,
37 display: Arc<dyn Display>,
38 print_file_separator: bool,
39 file_separator_printed: Rc<AtomicBool>,
40}
41
42pub struct WalkerBuilder(Walker);
43
44impl WalkerBuilder {
45 pub fn new(grep: Grep, matcher: Matcher, display: Arc<dyn Display>) -> Self {
46 WalkerBuilder {
47 0: Walker::new(grep, matcher, display),
48 }
49 }
50
51 pub fn thread_pool(mut self, tpool: ThreadPool) -> WalkerBuilder {
52 self.0.tpool = Some(tpool);
53 self
54 }
55
56 pub fn ignore_patterns(mut self, ignore_patterns: Patterns) -> WalkerBuilder {
57 self.0.ignore_patterns = Arc::new(ignore_patterns);
58 self
59 }
60
61 pub fn force_ignore_patterns(mut self, force_ignore_patterns: Patterns) -> WalkerBuilder {
62 self.0.force_ignore_patterns = Arc::new(force_ignore_patterns);
63 self
64 }
65
66 pub fn file_filters(mut self, file_filters: Filters) -> WalkerBuilder {
67 self.0.file_filters = Arc::new(file_filters);
68 self
69 }
70
71 pub fn ignore_symlinks(mut self, ignore_symlinks: bool) -> WalkerBuilder {
72 self.0.ignore_symlinks = ignore_symlinks;
73 self
74 }
75
76 pub fn print_file_separator(mut self, print_file_separator: bool) -> WalkerBuilder {
77 self.0.print_file_separator = print_file_separator;
78 self
79 }
80
81 pub fn build(self) -> Walker {
82 self.0
83 }
84}
85
86impl Walker {
87 pub fn new(grep: Grep, matcher: Matcher, display: Arc<dyn Display>) -> Self {
88 Walker {
89 tpool: None,
90 ignore_patterns: Default::default(),
91 force_ignore_patterns: Default::default(),
92 file_filters: Default::default(),
93 grep,
94 matcher,
95 ignore_symlinks: false,
96 display,
97 print_file_separator: false,
98 file_separator_printed: Default::default(),
99 }
100 }
101
102 fn is_ignore_file(&self, entry: &DirEntry) -> bool {
103 Some(GIT_IGNORE) == entry.file_name().to_str()
104 }
105
106 fn is_excluded(&self, path: &Path, is_dir: bool) -> bool {
107 let path = path.to_str().unwrap();
108 let skip = self.force_ignore_patterns.is_excluded(path, is_dir);
109 if skip {
110 info!("Skipping [forced] {:?}", path);
111 return true;
112 }
113 let skip = self.ignore_patterns.is_excluded(path, is_dir);
114 if skip {
115 info!("Skipping {:?}", path);
116 }
117 skip
118 }
119
120 fn process_gitignore(path: &Path) -> Option<Patterns> {
121 let ifile = {
122 let mut ifile = path.to_path_buf();
123 ifile.push(GIT_IGNORE);
124 ifile
125 };
126 match ifile.to_patterns() {
127 Ok(ignore_patterns) => Some(ignore_patterns),
128 Err(e) => {
129 match e.downcast_ref::<io::Error>() {
130 Some(e) if e.kind() == io::ErrorKind::NotFound => {}
131 _ => error!("Failed to process path '{}': {:?}", ifile.display(), e),
132 };
133 None
134 }
135 }
136 }
137
138 fn contains_git_dir(path: &Path) -> bool {
139 let mut path = path.to_path_buf();
140 path.push(GIT_DIR);
141 path.exists()
142 }
143
144 fn walk_dir(&self, path: &Path, parents: &[PathBuf]) {
145 let walker = {
146 let mut walker = self.clone();
147 if let Some(mut ignore_patterns) = Self::process_gitignore(path) {
148 ignore_patterns.extend(&walker.ignore_patterns);
149 walker.ignore_patterns = Arc::new(ignore_patterns);
150 }
151 walker
152 };
153
154 let mut to_dive = BTreeMap::new();
155 let mut to_grep = Vec::new();
156
157 let entries: Vec<_> = fs::read_dir(path)
158 .unwrap()
159 .filter_map(|entry| entry.ok())
160 .filter(|entry| !self.is_ignore_file(entry))
161 .filter_map(|entry| match entry.metadata() {
162 Ok(meta) => Some((entry.path(), meta)),
163 Err(e) => {
164 error!("Failed to get path '{}' metadata: {}", path.display(), e);
165 None
166 }
167 })
168 .filter(|(entry, meta)| !walker.is_excluded(entry, meta.is_dir()))
169 .collect();
170 for (path, meta) in entries {
171 let file_type = meta.file_type();
172 if file_type.is_file() {
173 if !self.file_filters.matches(path.to_str().unwrap()) {
174 continue;
175 }
176 to_grep.push((path, meta.len() as usize));
177 } else {
178 to_dive.insert(path, meta);
179 }
180 }
181
182 let parents = {
183 let mut parents = parents.to_owned();
184 parents.push(path.to_path_buf());
185 parents
186 };
187 for (entry, meta) in to_dive {
188 walker.walk_with_parents(&entry, Some(meta), &parents);
189 }
190
191 self.grep_many(&to_grep);
192 }
193
194 fn grep(
195 grep: Grep,
196 entry: Arc<PathBuf>,
197 len: usize,
198 matcher: Matcher,
199 display: Arc<dyn Display>,
200 ) {
201 match Mapped::new(&entry, len) {
202 Ok(mapped) => {
203 if content_inspector::inspect(&*mapped).is_binary() {
204 debug!("Skipping binary file '{}'", entry.display());
205 return;
206 }
207 (grep)(Arc::new(mapped), matcher, display);
208 }
209 Err(e) => {
210 warn!("Failed to map file '{}': {}", entry.display(), e);
211 (grep)(entry, matcher, display);
212 }
213 }
214 }
215
216 fn grep_many(&self, entries: &[(PathBuf, usize)]) {
217 let writer = self.display.writer();
218 let mut writers = BTreeMap::new();
219 let wg = WaitGroup::new();
220 for (entry, len) in entries {
221 let entry = Arc::new(entry.clone());
222 let matcher = self.matcher.clone();
223 let writer = Arc::new(BufferedWriter::new());
224 let display = self.display.with_writer(writer.clone());
225 writers.insert(entry.clone(), writer);
226 let len = *len;
227 if len == 0 {
228 (self.grep)(Arc::new(Zero::new((*entry).clone())), matcher, display);
229 continue;
230 }
231 if entries.len() < 3 {
232 Walker::grep(self.grep.clone(), entry, len, matcher, display);
233 continue;
234 }
235 match &self.tpool {
236 Some(tpool) => {
237 let grep = self.grep.clone();
238 let wg = wg.clone();
239 tpool.spawn_ok(async move {
240 Walker::grep(grep, entry, len, matcher, display);
241 drop(wg);
242 });
243 }
244 None => Walker::grep(self.grep.clone(), entry, len, matcher, display),
245 }
246 }
247 wg.wait();
248 for (_, w) in writers {
249 if self.print_file_separator
250 && w.has_some()
251 && self.file_separator_printed.swap(true, Ordering::Relaxed)
252 {
253 self.display.file_separator();
254 }
255 w.flush(&writer);
256 }
257 }
258
259 fn canonicalize(&self, orig: &Path, resolved: &Path) -> anyhow::Result<PathBuf> {
260 let cwd = env::current_dir()?;
261 let parent = orig
262 .parent()
263 .ok_or_else(|| anyhow::Error::msg("no parent"))?;
264 env::set_current_dir(&parent)?;
265 let path = resolved
266 .canonicalize()
267 .map_err(|e| anyhow::Error::new(e).context(format!("cwd {}", parent.display())));
268 env::set_current_dir(&cwd)?;
269 path
270 }
271
272 fn process_symlink(&self, orig: &Path, resolved: &Path, parents: &[PathBuf]) {
273 let path = self.canonicalize(orig, resolved);
274 if let Err(e) = path {
275 error!("Failed to canonicalize '{}': {}", resolved.display(), e);
276 return;
277 }
278 let path = path.unwrap();
279 if let Some(level) = parents.iter().position(|parent| *parent == path) {
280 error!(
281 "Symlink '{}' -> '{}' (dereferenced to '{}') loop detected at level {}",
282 orig.display(),
283 resolved.display(),
284 path.display(),
285 level,
286 );
287 return;
288 }
289 if parents.iter().any(|parent| path.starts_with(parent)) {
290 info!(
291 "Skipping symlink '{}' -> '{}' (dereferenced to '{}')",
292 orig.display(),
293 resolved.display(),
294 path.display(),
295 );
296 return;
297 }
298 self.walk_with_parents(&path, None, &{
299 let mut parents = parents.to_owned();
300 parents.push(path.clone());
301 parents
302 });
303 }
304
305 fn walk_with_parents(&self, path: &Path, meta: Option<fs::Metadata>, parents: &[PathBuf]) {
306 let meta = meta.or_else(|| match fs::symlink_metadata(path) {
307 Ok(meta) => Some(meta),
308 Err(e) => {
309 error!("Failed to get path '{}' metadata: {}", path.display(), e);
310 None
311 }
312 });
313 let meta = match meta {
314 Some(meta) => meta,
315 _ => return,
316 };
317 let file_type = meta.file_type();
318 if file_type.is_dir() {
319 self.walk_dir(path, parents);
320 } else if file_type.is_file() {
321 Walker::grep(
322 self.grep.clone(),
323 Arc::new(path.to_path_buf()),
324 meta.len() as usize,
325 self.matcher.clone(),
326 self.display.clone(),
327 );
328 } else if file_type.is_symlink() {
329 if self.ignore_symlinks {
330 info!("Skipping symlink '{}'", path.display());
331 return;
332 }
333 match fs::read_link(path) {
334 Ok(resolved) => self.process_symlink(path, &resolved, parents),
335 Err(e) => error!("Failed to read link '{}': {}", path.display(), e),
336 }
337 } else {
338 warn!("Unhandled path '{}': {:?}", path.display(), file_type)
339 }
340 }
341
342 pub fn find_ignore_patterns_in_parents(path: &Path) -> Option<Patterns> {
343 if Self::contains_git_dir(path) {
344 return None;
345 }
346 let mut patterns = Vec::new();
347 let mut path = path.to_path_buf();
348 while path.pop() {
349 if let Some(ignore_patterns) = Self::process_gitignore(&path) {
350 debug!("Found .gitignore in {}", path.display());
351 patterns.push(ignore_patterns);
352 }
353 if Self::contains_git_dir(&path) {
354 break;
355 }
356 }
357 if patterns.is_empty() {
358 return None;
359 }
360 let mut ignore_patterns = Patterns::default();
361 for pattern in patterns {
362 ignore_patterns.extend(&pattern);
363 }
364 Some(ignore_patterns)
365 }
366
367 pub fn walk(&self, path: &Path) {
368 self.walk_with_parents(path, None, &[]);
369 }
370}