1use crate::decompress::maybe_decompress;
6use crate::error::Result;
7use crate::format::is_binary;
8use crate::executor::{Match, QueryOptions};
9use ignore::WalkBuilder;
10use memmap2::Mmap;
11use rayon::prelude::*;
12use regex::Regex;
13use std::fs::File;
14use std::io::{BufRead, BufReader, Cursor, Read};
15use std::path::{Path, PathBuf};
16use std::sync::atomic::{AtomicU32, Ordering};
17
18pub struct Scanner {
19 root: PathBuf,
20}
21
22impl Scanner {
23 pub fn new(root: &Path) -> Self {
24 Self {
25 root: root.to_owned(),
26 }
27 }
28
29 pub fn scan(
30 &self,
31 pattern: &str,
32 is_regex: bool,
33 ignore_case: bool,
34 options: &QueryOptions,
35 ) -> Result<Vec<Match>> {
36 let raw = if is_regex {
37 pattern.to_string()
38 } else {
39 regex::escape(pattern)
40 };
41 let regex_pat = if ignore_case { format!("(?i){raw}") } else { raw };
42 let regex = Regex::new(®ex_pat)?;
43
44 let walker = WalkBuilder::new(&self.root)
45 .hidden(false)
46 .git_ignore(true)
47 .require_git(false)
48 .add_custom_ignore_filename(".ixignore")
49 .filter_entry(move |entry| {
50 let path = entry.path();
51 let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
52
53 if entry.file_type().map(|t| t.is_dir()).unwrap_or(false)
55 && (name == "lost+found" || name == ".git" || name == "node_modules" ||
56 name == "target" || name == "__pycache__" || name == ".tox" ||
57 name == ".venv" || name == "venv" || name == ".ix")
58 {
59 return false;
60 }
61
62 if entry.file_type().map(|t| t.is_file()).unwrap_or(false) {
64 if let Ok(metadata) = entry.metadata()
65 && metadata.len() > 10 * 1024 * 1024
66 {
67 return false;
68 }
69 if name == "Cargo.lock" || name == "package-lock.json" || name == "pnpm-lock.yaml" ||
70 name == "shard.ix" || name == "shard.ix.tmp"
71 {
72 return false;
73 }
74 }
75
76 if entry.file_type().map(|t| t.is_file()).unwrap_or(false) {
78 let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
79 match ext {
80 "so" | "o" | "dylib" | "a" | "dll" | "exe" | "pyc" |
82 "jpg" | "png" | "gif" | "mp4" | "mp3" | "pdf" |
84 "zip" | "7z" | "rar" |
86 "sqlite" | "db" | "bin" => return false,
88 _ => {}
89 }
90 if name.ends_with(".tar.gz") {
91 return false;
92 }
93 }
94 true
95 })
96 .build();
97
98 let paths: Vec<PathBuf> = walker
99 .filter_map(|result| {
100 match result {
101 Ok(entry) => Some(entry),
102 Err(e) => {
103 eprintln!("ix: warning: scanner skipping path: {}", e);
104 None
105 }
106 }
107 })
108 .filter(|entry| entry.file_type().map(|t| t.is_file()).unwrap_or(false))
109 .map(|entry| entry.path().to_owned())
110 .collect();
111
112 let matches_found = AtomicU32::new(0);
113 let mut matches: Vec<Match> = paths
114 .into_par_iter()
115 .filter_map(|path| {
116 if options.max_results > 0
117 && matches_found.load(Ordering::Relaxed) >= options.max_results as u32
118 {
119 return None;
120 }
121
122 if !options.type_filter.is_empty() {
124 let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
125 if !options.type_filter.iter().any(|e: &String| e == ext) {
126 return None;
127 }
128 }
129
130 if options.archive {
132 let _ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
133 let _is_tar_gz = path.to_str().map(|s| s.ends_with(".tar.gz")).unwrap_or(false);
134
135 #[cfg(feature = "archive")]
136 {
137 if _ext == "zip"
138 && let Ok(archive_matches) = crate::archive::scan_zip(&path, ®ex, options)
139 {
140 matches_found.fetch_add(archive_matches.len() as u32, Ordering::Relaxed);
141 return Some(archive_matches);
142 } else if _is_tar_gz
143 && let Ok(archive_matches) = crate::archive::scan_tar_gz(&path, ®ex, options)
144 {
145 matches_found.fetch_add(archive_matches.len() as u32, Ordering::Relaxed);
146 return Some(archive_matches);
147 }
148 }
149 }
150
151 let file_matches = self.scan_file(&path, ®ex, options).ok()?;
152 matches_found.fetch_add(file_matches.len() as u32, Ordering::Relaxed);
153 Some(file_matches)
154 })
155 .flatten()
156 .collect();
157
158 if options.max_results > 0 && matches.len() > options.max_results {
159 matches.truncate(options.max_results);
160 }
161
162 Ok(matches)
163 }
164
165 fn scan_stream<R: Read>(
166 &self,
167 reader: R,
168 path: &Path,
169 regex: &Regex,
170 options: &QueryOptions,
171 ) -> Result<Vec<Match>> {
172 let mut buf_reader = BufReader::new(reader);
173 let mut matches = Vec::new();
174 let mut line_number = 0u32;
175 let mut byte_offset = 0u64;
176
177 {
179 let buffer = buf_reader.fill_buf()?;
180 if buffer.is_empty() {
181 return Ok(vec![]);
182 }
183 let is_bin = is_binary(buffer);
184 if is_bin && !options.binary {
185 return Ok(vec![]);
186 }
187 }
188
189 let mut line = String::new();
190 let mut context_before = std::collections::VecDeque::new();
191 let mut pending_matches: Vec<Match> = Vec::new();
192
193 while buf_reader.read_line(&mut line)? > 0 {
194 line_number += 1;
195 let line_len = line.len() as u64;
196 let trimmed_line = line.trim_end().to_string();
197
198 for m in &mut pending_matches {
200 if m.context_after.len() < options.context_lines {
201 m.context_after.push(trimmed_line.clone());
202 }
203 }
204
205 let (completed, still_pending): (Vec<_>, Vec<_>) = pending_matches
207 .into_iter()
208 .partition(|m| m.context_after.len() >= options.context_lines);
209 matches.extend(completed);
210 pending_matches = still_pending;
211
212 if let Some(m) = regex.find(&line) {
213 let context_before_vec: Vec<String> =
214 context_before.iter().map(|s: &String| s.trim_end().to_string()).collect();
215
216 let new_match = Match {
217 file_path: path.to_owned(),
218 line_number,
219 col: (m.start() + 1) as u32,
220 line_content: if options.count_only {
221 String::new()
222 } else {
223 trimmed_line.clone()
224 },
225 byte_offset: byte_offset + m.start() as u64,
226 context_before: context_before_vec,
227 context_after: vec![],
228 is_binary: false,
229 };
230
231 if options.context_lines > 0 {
232 pending_matches.push(new_match);
233 } else {
234 matches.push(new_match);
235 }
236
237 if options.max_results > 0
238 && (matches.len() + pending_matches.len()) >= options.max_results
239 && (pending_matches.is_empty() || matches.len() >= options.max_results)
240 {
241 break;
242 }
243 }
244
245 if options.context_lines > 0 {
246 context_before.push_back(line.clone());
247 if context_before.len() > options.context_lines {
248 context_before.pop_front();
249 }
250 }
251
252 byte_offset += line_len;
253 line.clear();
254 }
255
256 matches.extend(pending_matches);
257 Ok(matches)
258 }
259
260 fn scan_file(
261 &self,
262 path: &Path,
263 regex: &Regex,
264 options: &QueryOptions,
265 ) -> Result<Vec<Match>> {
266 let file = File::open(path)?;
267 let metadata = file.metadata()?;
268 if metadata.len() > 100 * 1024 * 1024 && !options.decompress {
269 return Ok(vec![]);
271 }
272
273 let mmap = unsafe { Mmap::map(&file)? };
274
275 if options.decompress
276 && let Some(reader) = maybe_decompress(path, &mmap)? {
277 return self.scan_stream(reader, path, regex, options);
278 }
279
280 self.scan_stream(Cursor::new(&mmap[..]), path, regex, options)
282 }
283}