1use crate::decompress::maybe_decompress;
6use crate::error::Result;
7use crate::executor::{Match, QueryOptions};
8use crate::format::is_binary;
9use ignore::WalkBuilder;
10use memmap2::Mmap;
11use rayon::prelude::*;
12use regex::Regex;
13use std::fs::File;
14use std::io::{BufRead, BufReader, Cursor, Read};
15use std::path::{Path, PathBuf};
16use std::sync::atomic::{AtomicU32, Ordering};
17
18pub struct Scanner {
19 root: PathBuf,
20}
21
22impl Scanner {
23 pub fn new(root: &Path) -> Self {
24 Self {
25 root: root.to_owned(),
26 }
27 }
28
29 pub fn scan(
30 &self,
31 pattern: &str,
32 is_regex: bool,
33 ignore_case: bool,
34 options: &QueryOptions,
35 ) -> Result<Vec<Match>> {
36 let raw = if is_regex {
37 pattern.to_string()
38 } else {
39 regex::escape(pattern)
40 };
41 let regex_pat = if ignore_case {
42 format!("(?i){raw}")
43 } else {
44 raw
45 };
46 let regex = Regex::new(®ex_pat)?;
47
48 let walker = WalkBuilder::new(&self.root)
49 .hidden(false)
50 .git_ignore(true)
51 .require_git(false)
52 .add_custom_ignore_filename(".ixignore")
53 .filter_entry(move |entry| {
54 let path = entry.path();
55 let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
56
57 if entry.file_type().map(|t| t.is_dir()).unwrap_or(false)
59 && (name == "lost+found"
60 || name == ".git"
61 || name == "node_modules"
62 || name == "target"
63 || name == "__pycache__"
64 || name == ".tox"
65 || name == ".venv"
66 || name == "venv"
67 || name == ".ix")
68 {
69 return false;
70 }
71
72 if entry.file_type().map(|t| t.is_file()).unwrap_or(false) {
74 if let Ok(metadata) = entry.metadata()
75 && metadata.len() > 10 * 1024 * 1024
76 {
77 return false;
78 }
79 if name == "Cargo.lock"
80 || name == "package-lock.json"
81 || name == "pnpm-lock.yaml"
82 || name == "shard.ix"
83 || name == "shard.ix.tmp"
84 {
85 return false;
86 }
87 }
88
89 if entry.file_type().map(|t| t.is_file()).unwrap_or(false) {
91 let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
92 match ext {
93 "so" | "o" | "dylib" | "a" | "dll" | "exe" | "pyc" |
95 "jpg" | "png" | "gif" | "mp4" | "mp3" | "pdf" |
97 "zip" | "7z" | "rar" |
99 "sqlite" | "db" | "bin" => return false,
101 _ => {}
102 }
103 if name.ends_with(".tar.gz") {
104 return false;
105 }
106 }
107 true
108 })
109 .build();
110
111 let paths: Vec<PathBuf> = walker
112 .filter_map(|result| match result {
113 Ok(entry) => Some(entry),
114 Err(e) => {
115 eprintln!("ix: warning: scanner skipping path: {}", e);
116 None
117 }
118 })
119 .filter(|entry| entry.file_type().map(|t| t.is_file()).unwrap_or(false))
120 .map(|entry| entry.path().to_owned())
121 .collect();
122
123 let matches_found = AtomicU32::new(0);
124 let mut matches: Vec<Match> = paths
125 .into_par_iter()
126 .filter_map(|path| {
127 if options.max_results > 0
128 && matches_found.load(Ordering::Relaxed) >= options.max_results as u32
129 {
130 return None;
131 }
132
133 if !options.type_filter.is_empty() {
135 let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
136 if !options.type_filter.iter().any(|e: &String| e == ext) {
137 return None;
138 }
139 }
140
141 if options.archive {
143 let _ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
144 let _is_tar_gz = path
145 .to_str()
146 .map(|s| s.ends_with(".tar.gz"))
147 .unwrap_or(false);
148
149 #[cfg(feature = "archive")]
150 {
151 if _ext == "zip"
152 && let Ok(archive_matches) =
153 crate::archive::scan_zip(&path, ®ex, options)
154 {
155 matches_found
156 .fetch_add(archive_matches.len() as u32, Ordering::Relaxed);
157 return Some(archive_matches);
158 } else if _is_tar_gz
159 && let Ok(archive_matches) =
160 crate::archive::scan_tar_gz(&path, ®ex, options)
161 {
162 matches_found
163 .fetch_add(archive_matches.len() as u32, Ordering::Relaxed);
164 return Some(archive_matches);
165 }
166 }
167 }
168
169 let file_matches = self.scan_file(&path, ®ex, options).ok()?;
170 matches_found.fetch_add(file_matches.len() as u32, Ordering::Relaxed);
171 Some(file_matches)
172 })
173 .flatten()
174 .collect();
175
176 if options.max_results > 0 && matches.len() > options.max_results {
177 matches.truncate(options.max_results);
178 }
179
180 Ok(matches)
181 }
182
183 fn scan_stream<R: Read>(
184 &self,
185 reader: R,
186 path: &Path,
187 regex: &Regex,
188 options: &QueryOptions,
189 ) -> Result<Vec<Match>> {
190 let mut buf_reader = BufReader::new(reader);
191 let mut matches = Vec::new();
192 let mut line_number = 0u32;
193 let mut byte_offset = 0u64;
194
195 {
197 let buffer = buf_reader.fill_buf()?;
198 if buffer.is_empty() {
199 return Ok(vec![]);
200 }
201 let is_bin = is_binary(buffer);
202 if is_bin && !options.binary {
203 return Ok(vec![]);
204 }
205 }
206
207 let mut line = String::new();
208 let mut context_before = std::collections::VecDeque::new();
209 let mut pending_matches: Vec<Match> = Vec::new();
210
211 while buf_reader.read_line(&mut line)? > 0 {
212 line_number += 1;
213 let line_len = line.len() as u64;
214 let trimmed_line = line.trim_end().to_string();
215
216 for m in &mut pending_matches {
218 if m.context_after.len() < options.context_lines {
219 m.context_after.push(trimmed_line.clone());
220 }
221 }
222
223 let (completed, still_pending): (Vec<_>, Vec<_>) = pending_matches
225 .into_iter()
226 .partition(|m| m.context_after.len() >= options.context_lines);
227 matches.extend(completed);
228 pending_matches = still_pending;
229
230 if let Some(m) = regex.find(&line) {
231 let context_before_vec: Vec<String> = context_before
232 .iter()
233 .map(|s: &String| s.trim_end().to_string())
234 .collect();
235
236 let new_match = Match {
237 file_path: path.to_owned(),
238 line_number,
239 col: (m.start() + 1) as u32,
240 line_content: if options.count_only {
241 String::new()
242 } else {
243 trimmed_line.clone()
244 },
245 byte_offset: byte_offset + m.start() as u64,
246 context_before: context_before_vec,
247 context_after: vec![],
248 is_binary: false,
249 };
250
251 if options.context_lines > 0 {
252 pending_matches.push(new_match);
253 } else {
254 matches.push(new_match);
255 }
256
257 if options.max_results > 0
258 && (matches.len() + pending_matches.len()) >= options.max_results
259 && (pending_matches.is_empty() || matches.len() >= options.max_results)
260 {
261 break;
262 }
263 }
264
265 if options.context_lines > 0 {
266 context_before.push_back(line.clone());
267 if context_before.len() > options.context_lines {
268 context_before.pop_front();
269 }
270 }
271
272 byte_offset += line_len;
273 line.clear();
274 }
275
276 matches.extend(pending_matches);
277 Ok(matches)
278 }
279
280 fn scan_file(&self, path: &Path, regex: &Regex, options: &QueryOptions) -> Result<Vec<Match>> {
281 let file = File::open(path)?;
282 let metadata = file.metadata()?;
283 if metadata.len() > 100 * 1024 * 1024 && !options.decompress {
284 return Ok(vec![]);
286 }
287
288 let mmap = unsafe { Mmap::map(&file)? };
289
290 if options.decompress
291 && let Some(reader) = maybe_decompress(path, &mmap)?
292 {
293 return self.scan_stream(reader, path, regex, options);
294 }
295
296 self.scan_stream(Cursor::new(&mmap[..]), path, regex, options)
298 }
299}