1use crate::decompress::maybe_decompress;
6use crate::error::Result;
7use crate::executor::{Match, QueryOptions};
8use crate::format::is_binary;
9use ignore::WalkBuilder;
10use memmap2::Mmap;
11use rayon::prelude::*;
12use regex::Regex;
13use std::fs::File;
14use std::io::{BufRead, BufReader, Cursor, Read};
15use std::path::{Path, PathBuf};
16use std::sync::atomic::{AtomicU32, Ordering};
17
18pub struct Scanner {
23 root: PathBuf,
24}
25
26impl Scanner {
27 #[must_use]
29 pub fn new(root: &Path) -> Self {
30 Self {
31 root: root.to_owned(),
32 }
33 }
34
35 #[allow(clippy::too_many_lines)]
42 pub fn scan(
43 &self,
44 pattern: &str,
45 is_regex: bool,
46 ignore_case: bool,
47 options: &QueryOptions,
48 ) -> Result<Vec<Match>> {
49 let raw = if is_regex {
50 pattern.to_string()
51 } else {
52 regex::escape(pattern)
53 };
54
55 let with_word_boundaries = if options.word_boundary && !is_regex {
57 format!("\\b{raw}\\b")
58 } else {
59 raw
60 };
61
62 let mut regex_pat = String::new();
64 if ignore_case {
65 regex_pat.push_str("(?i)");
66 }
67 if options.multiline {
68 regex_pat.push_str("(?s)");
69 }
70 regex_pat.push_str(&with_word_boundaries);
71
72 let regex = Regex::new(®ex_pat)?;
73
74 let walker = WalkBuilder::new(&self.root)
75 .hidden(false)
76 .git_ignore(true)
77 .require_git(false)
78 .add_custom_ignore_filename(".ixignore")
79 .filter_entry(move |entry| {
80 let path = entry.path();
81 let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
82
83 if entry.file_type().is_some_and(|t| t.is_dir())
85 && (name == "lost+found"
86 || name == ".git"
87 || name == "node_modules"
88 || name == "target"
89 || name == "__pycache__"
90 || name == ".tox"
91 || name == ".venv"
92 || name == "venv"
93 || name == ".ix")
94 {
95 return false;
96 }
97
98 if entry.file_type().is_some_and(|t| t.is_file()) {
100 if let Ok(metadata) = entry.metadata()
101 && metadata.len() > 10 * 1024 * 1024
102 {
103 return false;
104 }
105 if name == "Cargo.lock"
106 || name == "package-lock.json"
107 || name == "pnpm-lock.yaml"
108 || name == "shard.ix"
109 || name == "shard.ix.tmp"
110 {
111 return false;
112 }
113 }
114
115 if entry.file_type().is_some_and(|t| t.is_file()) {
117 let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
118 match ext {
119 "so" | "o" | "dylib" | "a" | "dll" | "exe" | "pyc" |
121 "jpg" | "png" | "gif" | "mp4" | "mp3" | "pdf" |
123 "zip" | "7z" | "rar" |
125 "sqlite" | "db" | "bin" => return false,
127 _ => {}
128 }
129 if name.ends_with(".tar.gz") {
130 return false;
131 }
132 }
133 true
134 })
135 .build();
136
137 let paths: Vec<PathBuf> = walker
138 .filter_map(|result| match result {
139 Ok(entry) => Some(entry),
140 Err(e) => {
141 eprintln!("ix: warning: scanner skipping path: {e}");
142 None
143 }
144 })
145 .filter(|entry| entry.file_type().is_some_and(|t| t.is_file()))
146 .map(|entry| entry.path().to_owned())
147 .collect();
148
149 let matches_found = AtomicU32::new(0);
150 let mut matches: Vec<Match> = paths
151 .into_par_iter()
152 .filter_map(|path| {
153 if options.max_results > 0
154 && matches_found.load(Ordering::Relaxed)
155 >= u32::try_from(options.max_results).unwrap_or(0)
156 {
157 return None;
158 }
159
160 if !options.type_filter.is_empty() {
162 let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
163 if !options.type_filter.iter().any(|e: &String| e == ext) {
164 return None;
165 }
166 }
167
168 if options.archive {
170 #[cfg(feature = "archive")]
171 {
172 let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
173 let is_tar_gz = path.to_str().is_some_and(|s| s.ends_with(".tar.gz"));
174 if ext == "zip"
175 && let Ok(archive_matches) =
176 crate::archive::scan_zip(&path, ®ex, options)
177 {
178 matches_found.fetch_add(
179 u32::try_from(archive_matches.len()).unwrap_or(0),
180 Ordering::Relaxed,
181 );
182 return Some(archive_matches);
183 }
184 if is_tar_gz
185 && let Ok(archive_matches) =
186 crate::archive::scan_tar_gz(&path, ®ex, options)
187 {
188 matches_found.fetch_add(
189 u32::try_from(archive_matches.len()).unwrap_or(0),
190 Ordering::Relaxed,
191 );
192 return Some(archive_matches);
193 }
194 }
195 }
196
197 let file_matches = Self::scan_file(&path, ®ex, options).ok()?;
198 matches_found.fetch_add(
199 u32::try_from(file_matches.len()).unwrap_or(0),
200 Ordering::Relaxed,
201 );
202 Some(file_matches)
203 })
204 .flatten()
205 .collect();
206
207 if options.max_results > 0 && matches.len() > options.max_results {
208 matches.truncate(options.max_results);
209 }
210
211 Ok(matches)
212 }
213
214 #[allow(clippy::too_many_lines)]
215 fn scan_stream<R: Read>(
216 reader: R,
217 path: &Path,
218 regex: &Regex,
219 options: &QueryOptions,
220 ) -> Result<Vec<Match>> {
221 let mut buf_reader = BufReader::new(reader);
222 let mut matches = Vec::new();
223 let mut line_number = 0u32;
224 let mut byte_offset = 0u64;
225
226 {
228 let buffer = buf_reader.fill_buf()?;
229 if buffer.is_empty() {
230 return Ok(vec![]);
231 }
232 let is_bin = is_binary(buffer);
233 if is_bin && !options.binary {
234 return Ok(vec![]);
235 }
236 }
237
238 let mut line = String::new();
239 let mut context_before = std::collections::VecDeque::new();
240 let mut pending_matches: Vec<Match> = Vec::new();
241
242 while buf_reader.read_line(&mut line)? > 0 {
243 line_number += 1;
244 let line_len = u64::try_from(line.len()).unwrap_or(0);
245 let trimmed_line = line.trim_end().to_string();
246
247 for m in &mut pending_matches {
249 if m.context_after.len() < options.context_lines {
250 m.context_after.push(trimmed_line.clone());
251 }
252 }
253
254 let (completed, still_pending): (Vec<_>, Vec<_>) = pending_matches
256 .into_iter()
257 .partition(|m| m.context_after.len() >= options.context_lines);
258 matches.extend(completed);
259 pending_matches = still_pending;
260
261 if let Some(m) = regex.find(&line) {
262 let context_before_vec: Vec<String> = context_before
263 .iter()
264 .map(|s: &String| s.trim_end().to_string())
265 .collect();
266
267 let new_match = Match {
268 file_path: path.to_owned(),
269 line_number,
270 col: u32::try_from(m.start() + 1).unwrap_or(0),
271 line_content: if options.count_only {
272 String::new()
273 } else {
274 trimmed_line.clone()
275 },
276 byte_offset: byte_offset + u64::try_from(m.start()).unwrap_or(0),
277 context_before: context_before_vec,
278 context_after: vec![],
279 is_binary: false,
280 };
281
282 if options.context_lines > 0 {
283 pending_matches.push(new_match);
284 } else {
285 matches.push(new_match);
286 }
287
288 if options.max_results > 0
289 && (matches.len() + pending_matches.len()) >= options.max_results
290 && (pending_matches.is_empty() || matches.len() >= options.max_results)
291 {
292 break;
293 }
294 }
295
296 if options.context_lines > 0 {
297 context_before.push_back(line.clone());
298 if context_before.len() > options.context_lines {
299 context_before.pop_front();
300 }
301 }
302
303 byte_offset += line_len;
304 line.clear();
305 }
306
307 matches.extend(pending_matches);
308 Ok(matches)
309 }
310
311 fn scan_file(path: &Path, regex: &Regex, options: &QueryOptions) -> Result<Vec<Match>> {
312 let file = File::open(path)?;
313 let metadata = file.metadata()?;
314 if metadata.len() > 100 * 1024 * 1024 && !options.decompress {
315 return Ok(vec![]);
317 }
318
319 let mmap = unsafe { Mmap::map(&file)? };
320
321 if options.decompress
322 && let Some(reader) = maybe_decompress(path, &mmap)?
323 {
324 return Self::scan_stream(reader, path, regex, options);
325 }
326
327 Self::scan_stream(Cursor::new(&mmap[..]), path, regex, options)
329 }
330}