1use crate::decompress::maybe_decompress;
6use crate::error::Result;
7use crate::format::is_binary;
8use crate::executor::{Match, QueryOptions};
9use ignore::WalkBuilder;
10use memmap2::Mmap;
11use rayon::prelude::*;
12use regex::Regex;
13use std::fs::File;
14use std::io::{BufRead, BufReader, Cursor, Read};
15use std::path::{Path, PathBuf};
16use std::sync::atomic::{AtomicU32, Ordering};
17
18pub struct Scanner {
19 root: PathBuf,
20}
21
22impl Scanner {
23 pub fn new(root: &Path) -> Self {
24 Self {
25 root: root.to_owned(),
26 }
27 }
28
29 pub fn scan(
30 &self,
31 pattern: &str,
32 is_regex: bool,
33 ignore_case: bool,
34 options: &QueryOptions,
35 ) -> Result<Vec<Match>> {
36 let raw = if is_regex {
37 pattern.to_string()
38 } else {
39 regex::escape(pattern)
40 };
41 let regex_pat = if ignore_case { format!("(?i){raw}") } else { raw };
42 let regex = Regex::new(®ex_pat)?;
43
44 let walker = WalkBuilder::new(&self.root)
45 .hidden(false)
46 .git_ignore(true)
47 .require_git(false)
48 .add_custom_ignore_filename(".ixignore")
49 .filter_entry(move |entry| {
50 let path = entry.path();
51 let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
52
53 if entry.file_type().map(|t| t.is_dir()).unwrap_or(false)
55 && (name == "lost+found" || name == ".git" || name == "node_modules" ||
56 name == "target" || name == "__pycache__" || name == ".tox" ||
57 name == ".venv" || name == "venv" || name == ".ix")
58 {
59 return false;
60 }
61
62 if entry.file_type().map(|t| t.is_file()).unwrap_or(false) {
64 let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
65 match ext {
66 "so" | "o" | "dylib" | "a" | "dll" | "exe" | "pyc" |
68 "jpg" | "png" | "gif" | "mp4" | "mp3" | "pdf" |
70 "zip" | "7z" | "rar" |
72 "sqlite" | "db" | "bin" => return false,
74 _ => {}
75 }
76 if name.ends_with(".tar.gz") {
77 return false;
78 }
79 }
80 true
81 })
82 .build();
83
84 let paths: Vec<PathBuf> = walker
85 .filter_map(|result| {
86 match result {
87 Ok(entry) => Some(entry),
88 Err(e) => {
89 eprintln!("ix: warning: scanner skipping path: {}", e);
90 None
91 }
92 }
93 })
94 .filter(|entry| entry.file_type().map(|t| t.is_file()).unwrap_or(false))
95 .map(|entry| entry.path().to_owned())
96 .collect();
97
98 let matches_found = AtomicU32::new(0);
99 let mut matches: Vec<Match> = paths
100 .into_par_iter()
101 .filter_map(|path| {
102 if options.max_results > 0
103 && matches_found.load(Ordering::Relaxed) >= options.max_results as u32
104 {
105 return None;
106 }
107
108 if !options.type_filter.is_empty() {
110 let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
111 if !options.type_filter.iter().any(|e: &String| e == ext) {
112 return None;
113 }
114 }
115
116 if options.archive {
118 let _ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
119 let _is_tar_gz = path.to_str().map(|s| s.ends_with(".tar.gz")).unwrap_or(false);
120
121 #[cfg(feature = "archive")]
122 {
123 if _ext == "zip"
124 && let Ok(archive_matches) = crate::archive::scan_zip(&path, ®ex, options)
125 {
126 matches_found.fetch_add(archive_matches.len() as u32, Ordering::Relaxed);
127 return Some(archive_matches);
128 } else if _is_tar_gz
129 && let Ok(archive_matches) = crate::archive::scan_tar_gz(&path, ®ex, options)
130 {
131 matches_found.fetch_add(archive_matches.len() as u32, Ordering::Relaxed);
132 return Some(archive_matches);
133 }
134 }
135 }
136
137 let file_matches = self.scan_file(&path, ®ex, options).ok()?;
138 matches_found.fetch_add(file_matches.len() as u32, Ordering::Relaxed);
139 Some(file_matches)
140 })
141 .flatten()
142 .collect();
143
144 if options.max_results > 0 && matches.len() > options.max_results {
145 matches.truncate(options.max_results);
146 }
147
148 Ok(matches)
149 }
150
151 fn scan_stream<R: Read>(
152 &self,
153 reader: R,
154 path: &Path,
155 regex: &Regex,
156 options: &QueryOptions,
157 ) -> Result<Vec<Match>> {
158 let mut buf_reader = BufReader::new(reader);
159 let mut matches = Vec::new();
160 let mut line_number = 0u32;
161 let mut byte_offset = 0u64;
162
163 {
165 let buffer = buf_reader.fill_buf()?;
166 if buffer.is_empty() {
167 return Ok(vec![]);
168 }
169 let is_bin = is_binary(buffer);
170 if is_bin && !options.binary {
171 return Ok(vec![]);
172 }
173 }
174
175 let mut line = String::new();
176 let mut context_before = std::collections::VecDeque::new();
177 let mut pending_matches: Vec<Match> = Vec::new();
178
179 while buf_reader.read_line(&mut line)? > 0 {
180 line_number += 1;
181 let line_len = line.len() as u64;
182 let trimmed_line = line.trim_end().to_string();
183
184 for m in &mut pending_matches {
186 if m.context_after.len() < options.context_lines {
187 m.context_after.push(trimmed_line.clone());
188 }
189 }
190
191 let (completed, still_pending): (Vec<_>, Vec<_>) = pending_matches
193 .into_iter()
194 .partition(|m| m.context_after.len() >= options.context_lines);
195 matches.extend(completed);
196 pending_matches = still_pending;
197
198 if let Some(m) = regex.find(&line) {
199 let context_before_vec: Vec<String> =
200 context_before.iter().map(|s: &String| s.trim_end().to_string()).collect();
201
202 let new_match = Match {
203 file_path: path.to_owned(),
204 line_number,
205 col: (m.start() + 1) as u32,
206 line_content: if options.count_only {
207 String::new()
208 } else {
209 trimmed_line.clone()
210 },
211 byte_offset: byte_offset + m.start() as u64,
212 context_before: context_before_vec,
213 context_after: vec![],
214 is_binary: false,
215 };
216
217 if options.context_lines > 0 {
218 pending_matches.push(new_match);
219 } else {
220 matches.push(new_match);
221 }
222
223 if options.max_results > 0
224 && (matches.len() + pending_matches.len()) >= options.max_results
225 && (pending_matches.is_empty() || matches.len() >= options.max_results)
226 {
227 break;
228 }
229 }
230
231 if options.context_lines > 0 {
232 context_before.push_back(line.clone());
233 if context_before.len() > options.context_lines {
234 context_before.pop_front();
235 }
236 }
237
238 byte_offset += line_len;
239 line.clear();
240 }
241
242 matches.extend(pending_matches);
243 Ok(matches)
244 }
245
246 fn scan_file(
247 &self,
248 path: &Path,
249 regex: &Regex,
250 options: &QueryOptions,
251 ) -> Result<Vec<Match>> {
252 let file = File::open(path)?;
253 let metadata = file.metadata()?;
254 if metadata.len() > 100 * 1024 * 1024 && !options.decompress {
255 return Ok(vec![]);
257 }
258
259 let mmap = unsafe { Mmap::map(&file)? };
260
261 if options.decompress
262 && let Some(reader) = maybe_decompress(path, &mmap)? {
263 return self.scan_stream(reader, path, regex, options);
264 }
265
266 self.scan_stream(Cursor::new(&mmap[..]), path, regex, options)
268 }
269}