1use crate::decompress::maybe_decompress;
6use crate::error::Result;
7use crate::format::is_binary;
8use crate::executor::{Match, QueryOptions};
9use ignore::WalkBuilder;
10use memmap2::Mmap;
11use rayon::prelude::*;
12use regex::Regex;
13use std::fs::File;
14use std::io::{BufRead, BufReader, Cursor, Read};
15use std::path::{Path, PathBuf};
16use std::sync::atomic::{AtomicU32, Ordering};
17
18pub struct Scanner {
19 root: PathBuf,
20}
21
22impl Scanner {
23 pub fn new(root: &Path) -> Self {
24 Self {
25 root: root.to_owned(),
26 }
27 }
28
29 pub fn scan(
30 &self,
31 pattern: &str,
32 is_regex: bool,
33 ignore_case: bool,
34 options: &QueryOptions,
35 ) -> Result<Vec<Match>> {
36 let raw = if is_regex {
37 pattern.to_string()
38 } else {
39 regex::escape(pattern)
40 };
41 let regex_pat = if ignore_case { format!("(?i){raw}") } else { raw };
42 let regex = Regex::new(®ex_pat)?;
43
44 let walker = WalkBuilder::new(&self.root)
45 .hidden(false)
46 .git_ignore(true)
47 .build();
48
49 let paths: Vec<PathBuf> = walker
50 .filter_map(|result| result.ok())
51 .filter(|entry| entry.file_type().map(|t| t.is_file()).unwrap_or(false))
52 .map(|entry| entry.path().to_owned())
53 .collect();
54
55 let matches_found = AtomicU32::new(0);
56 let mut matches: Vec<Match> = paths
57 .into_par_iter()
58 .filter_map(|path| {
59 if options.max_results > 0
60 && matches_found.load(Ordering::Relaxed) >= options.max_results as u32
61 {
62 return None;
63 }
64
65 if !options.type_filter.is_empty() {
67 let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
68 if !options.type_filter.iter().any(|e: &String| e == ext) {
69 return None;
70 }
71 }
72
73 if options.archive {
75 let _ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
76 let _is_tar_gz = path.to_str().map(|s| s.ends_with(".tar.gz")).unwrap_or(false);
77
78 #[cfg(feature = "archive")]
79 {
80 if _ext == "zip"
81 && let Ok(archive_matches) = crate::archive::scan_zip(&path, ®ex, options)
82 {
83 matches_found.fetch_add(archive_matches.len() as u32, Ordering::Relaxed);
84 return Some(archive_matches);
85 } else if _is_tar_gz
86 && let Ok(archive_matches) = crate::archive::scan_tar_gz(&path, ®ex, options)
87 {
88 matches_found.fetch_add(archive_matches.len() as u32, Ordering::Relaxed);
89 return Some(archive_matches);
90 }
91 }
92 }
93
94 let file_matches = self.scan_file(&path, ®ex, options).ok()?;
95 matches_found.fetch_add(file_matches.len() as u32, Ordering::Relaxed);
96 Some(file_matches)
97 })
98 .flatten()
99 .collect();
100
101 if options.max_results > 0 && matches.len() > options.max_results {
102 matches.truncate(options.max_results);
103 }
104
105 Ok(matches)
106 }
107
108 fn scan_stream<R: Read>(
109 &self,
110 reader: R,
111 path: &Path,
112 regex: &Regex,
113 options: &QueryOptions,
114 ) -> Result<Vec<Match>> {
115 let mut buf_reader = BufReader::new(reader);
116 let mut matches = Vec::new();
117 let mut line_number = 0u32;
118 let mut byte_offset = 0u64;
119
120 {
122 let buffer = buf_reader.fill_buf()?;
123 if buffer.is_empty() {
124 return Ok(vec![]);
125 }
126 let is_bin = is_binary(buffer);
127 if is_bin && !options.binary {
128 return Ok(vec![]);
129 }
130 }
131
132 let mut line = String::new();
133 let mut context_before = std::collections::VecDeque::new();
134 let mut pending_matches: Vec<Match> = Vec::new();
135
136 while buf_reader.read_line(&mut line)? > 0 {
137 line_number += 1;
138 let line_len = line.len() as u64;
139 let trimmed_line = line.trim_end().to_string();
140
141 for m in &mut pending_matches {
143 if m.context_after.len() < options.context_lines {
144 m.context_after.push(trimmed_line.clone());
145 }
146 }
147
148 let (completed, still_pending): (Vec<_>, Vec<_>) = pending_matches
150 .into_iter()
151 .partition(|m| m.context_after.len() >= options.context_lines);
152 matches.extend(completed);
153 pending_matches = still_pending;
154
155 if let Some(m) = regex.find(&line) {
156 let context_before_vec: Vec<String> =
157 context_before.iter().map(|s: &String| s.trim_end().to_string()).collect();
158
159 let new_match = Match {
160 file_path: path.to_owned(),
161 line_number,
162 col: (m.start() + 1) as u32,
163 line_content: if options.count_only {
164 String::new()
165 } else {
166 trimmed_line.clone()
167 },
168 byte_offset: byte_offset + m.start() as u64,
169 context_before: context_before_vec,
170 context_after: vec![],
171 is_binary: false,
172 };
173
174 if options.context_lines > 0 {
175 pending_matches.push(new_match);
176 } else {
177 matches.push(new_match);
178 }
179
180 if options.max_results > 0
181 && (matches.len() + pending_matches.len()) >= options.max_results
182 && (pending_matches.is_empty() || matches.len() >= options.max_results)
183 {
184 break;
185 }
186 }
187
188 if options.context_lines > 0 {
189 context_before.push_back(line.clone());
190 if context_before.len() > options.context_lines {
191 context_before.pop_front();
192 }
193 }
194
195 byte_offset += line_len;
196 line.clear();
197 }
198
199 matches.extend(pending_matches);
200 Ok(matches)
201 }
202
203 fn scan_file(
204 &self,
205 path: &Path,
206 regex: &Regex,
207 options: &QueryOptions,
208 ) -> Result<Vec<Match>> {
209 let file = File::open(path)?;
210 let metadata = file.metadata()?;
211 if metadata.len() > 100 * 1024 * 1024 && !options.decompress {
212 return Ok(vec![]);
214 }
215
216 let mmap = unsafe { Mmap::map(&file)? };
217
218 if options.decompress
219 && let Some(reader) = maybe_decompress(path, &mmap)? {
220 return self.scan_stream(reader, path, regex, options);
221 }
222
223 self.scan_stream(Cursor::new(&mmap[..]), path, regex, options)
225 }
226}