1use std::collections::HashSet;
2use std::io::{self, Write};
3use std::path::{Path, PathBuf};
4use std::sync::atomic::{AtomicBool, Ordering};
5
6use grep_matcher::Matcher;
7use grep_regex::RegexMatcher;
8use grep_searcher::{Searcher, Sink, SinkMatch};
9use rayon::prelude::*;
10
11use crate::planner::TrigramPlan;
12use crate::Index;
13
14use super::{CompiledSearch, SearchMode, SearchOutput};
15
16#[cfg(test)]
17use super::Match;
18
19impl CompiledSearch {
20 #[must_use]
21 pub fn candidate_file_ids(
22 &self,
23 index: &Index,
24 prefixes: &[PathBuf],
25 exhaustive: bool,
26 ) -> Vec<usize> {
27 let ids: Vec<usize> = if exhaustive {
28 (0..index.file_count()).collect()
29 } else {
30 match &self.plan {
31 TrigramPlan::FullScan => (0..index.file_count()).collect(),
32 TrigramPlan::Narrow { arms } => index
33 .candidate_file_ids(arms.as_slice())
34 .into_iter()
35 .map(|id| id as usize)
36 .collect(),
37 }
38 };
39
40 ids.into_iter()
41 .filter(|&id| {
42 index
43 .file_path(id)
44 .is_some_and(|rel| path_in_scope(rel, prefixes))
45 })
46 .collect()
47 }
48
49 pub fn run_index(
55 &self,
56 index: &Index,
57 prefixes: &[PathBuf],
58 output: SearchOutput,
59 ) -> crate::Result<bool> {
60 let candidate_ids = self.candidate_file_ids(
61 index,
62 prefixes,
63 Self::uses_exhaustive_candidates(output.mode),
64 );
65 if candidate_ids.is_empty() {
66 return Ok(false);
67 }
68
69 let matcher = self.build_matcher()?;
70 let parallel = self.opts.max_results.is_none()
71 && candidate_ids.len() >= parallel_candidate_min_files();
72 match output.mode {
73 SearchMode::Standard | SearchMode::OnlyMatching => {
74 self.run_standard(index, &candidate_ids, &matcher, output, parallel)
75 }
76 SearchMode::Count
77 | SearchMode::FilesWithMatches
78 | SearchMode::FilesWithoutMatch
79 | SearchMode::Quiet => {
80 self.run_summary(index, &candidate_ids, &matcher, output, parallel)
81 }
82 }
83 }
84
85 fn run_standard(
86 &self,
87 index: &Index,
88 candidate_ids: &[usize],
89 matcher: &RegexMatcher,
90 output: SearchOutput,
91 parallel: bool,
92 ) -> crate::Result<bool> {
93 if parallel {
94 let stop = AtomicBool::new(false);
95 let mut files = candidate_ids
96 .par_iter()
97 .enumerate()
98 .map_init(
99 || StandardWorker::new(self, matcher.clone(), output),
100 |worker, (result_index, &id)| {
101 worker.search_candidate(index, result_index, id, &stop)
102 },
103 )
104 .collect::<Vec<_>>();
105 files.sort_by_key(|file| file.index);
106 return flush_chunk_output(files.into_iter().map(|file| file.output));
107 }
108
109 self.run_standard_capped(index, candidate_ids, matcher, output)
110 }
111
112 fn run_summary(
113 &self,
114 index: &Index,
115 candidate_ids: &[usize],
116 matcher: &RegexMatcher,
117 output: SearchOutput,
118 parallel: bool,
119 ) -> crate::Result<bool> {
120 if parallel {
121 let stop = AtomicBool::new(false);
122 let mut files = candidate_ids
123 .par_iter()
124 .enumerate()
125 .map_init(
126 || SummaryWorker::new(self, matcher.clone()),
127 |worker, (result_index, &id)| {
128 worker.search_candidate(index, result_index, id, output, &stop)
129 },
130 )
131 .collect::<Vec<_>>();
132 files.sort_by_key(|file| file.index);
133 return flush_chunk_output(files.into_iter().map(|file| file.output));
134 }
135
136 self.run_summary_capped(index, candidate_ids, matcher, output)
137 }
138
139 fn run_standard_capped(
140 &self,
141 index: &Index,
142 candidate_ids: &[usize],
143 matcher: &RegexMatcher,
144 output: SearchOutput,
145 ) -> crate::Result<bool> {
146 let mut any_match = false;
147 let mut remaining = self.opts.max_results;
148 let mut out = Vec::new();
149 let mut actual = index.root.clone();
150 for &id in candidate_ids {
151 let mut searcher = self.build_searcher(output.line_number, remaining);
152 let Some(candidate) = index.file_path(id) else {
153 continue;
154 };
155 actual.push(candidate);
156 let depth = candidate.components().count();
157 let mut sink = StandardSink::new(matcher, output, &actual, &mut out);
158 let _ = searcher.search_path(matcher, &actual, &mut sink);
159 any_match |= sink.matched;
160 let used = sink.match_count;
161 for _ in 0..depth {
162 actual.pop();
163 }
164 if let Some(ref mut left) = remaining {
165 *left = left.saturating_sub(used);
166 if *left == 0 {
167 break;
168 }
169 }
170 }
171
172 flush_chunk_output(std::iter::once(ChunkOutput {
173 bytes: out,
174 matched: any_match,
175 }))
176 }
177
178 fn run_summary_capped(
179 &self,
180 index: &Index,
181 candidate_ids: &[usize],
182 matcher: &RegexMatcher,
183 output: SearchOutput,
184 ) -> crate::Result<bool> {
185 let mut any_match = false;
186 let mut remaining = self.opts.max_results;
187 let mut out = Vec::new();
188 let mut worker = SummaryWorker::new(self, matcher.clone());
189 let mut actual = index.root.clone();
190 for &id in candidate_ids {
191 let Some(candidate) = index.file_path(id) else {
192 continue;
193 };
194 actual.push(candidate);
195 let depth = candidate.components().count();
196 let result = worker.search_file(&actual, output.mode);
197 any_match |= mode_is_success(output.mode, result);
198 write_summary_record(&mut out, output, &actual, result)?;
199 for _ in 0..depth {
200 actual.pop();
201 }
202 if let Some(ref mut left) = remaining {
203 *left = left.saturating_sub(usize::from(result.matched));
204 if *left == 0 {
205 break;
206 }
207 }
208 if matches!(output.mode, SearchMode::Quiet) && result.matched {
209 break;
210 }
211 }
212
213 flush_chunk_output(std::iter::once(ChunkOutput {
214 bytes: out,
215 matched: any_match,
216 }))
217 }
218
219 #[cfg(test)]
220 pub(crate) fn collect_index_matches(&self, index: &Index) -> crate::Result<Vec<Match>> {
221 let candidate_ids = self.candidate_file_ids(index, &[], false);
222 self.collect_index_candidates(index, &candidate_ids)
223 }
224
225 #[cfg(test)]
226 pub(crate) fn collect_walk_matches(&self, root: &Path) -> crate::Result<Vec<Match>> {
227 let root = root.canonicalize()?;
228 let mut candidates = Vec::new();
229 let walker = ignore::WalkBuilder::new(&root).follow_links(false).build();
230 for entry in walker {
231 let entry = entry.map_err(crate::Error::Ignore)?;
232 if entry.file_type().is_some_and(|ft| ft.is_file()) {
233 candidates.push(entry.path().to_path_buf());
234 }
235 }
236 self.collect_walk_candidates(&candidates)
237 }
238
239 #[cfg(test)]
240 fn collect_index_candidates(
241 &self,
242 index: &Index,
243 candidate_ids: &[usize],
244 ) -> crate::Result<Vec<Match>> {
245 let matcher = self.build_matcher()?;
246 let mut searcher = self.build_searcher(true, None);
247 let mut out = Vec::new();
248 let mut actual = index.root.clone();
249 for &id in candidate_ids {
250 let Some(candidate) = index.file_path(id) else {
251 continue;
252 };
253 actual.push(candidate);
254 let depth = candidate.components().count();
255 let mut sink =
256 CollectSink::new(actual.clone(), self.opts.only_matching(), matcher.clone());
257 let _ = searcher.search_path(&matcher, &actual, &mut sink);
258 for _ in 0..depth {
259 actual.pop();
260 }
261 out.extend(sink.into_matches());
262 }
263 Ok(out)
264 }
265
266 #[cfg(test)]
267 fn collect_walk_candidates(&self, candidates: &[PathBuf]) -> crate::Result<Vec<Match>> {
268 let matcher = self.build_matcher()?;
269 let mut searcher = self.build_searcher(true, None);
270 let mut out = Vec::new();
271 for candidate in candidates {
272 let mut sink = CollectSink::new(
273 candidate.clone(),
274 self.opts.only_matching(),
275 matcher.clone(),
276 );
277 let _ = searcher.search_path(&matcher, candidate, &mut sink);
278 out.extend(sink.into_matches());
279 }
280 Ok(out)
281 }
282}
283
284struct StandardWorker {
285 matcher: RegexMatcher,
286 searcher: Searcher,
287 output: SearchOutput,
288 bytes: Vec<u8>,
289}
290
291impl StandardWorker {
292 fn new(search: &CompiledSearch, matcher: RegexMatcher, output: SearchOutput) -> Self {
293 Self {
294 searcher: search.build_searcher(output.line_number, None),
295 matcher,
296 output,
297 bytes: Vec::new(),
298 }
299 }
300
301 fn search_candidate(
302 &mut self,
303 index: &Index,
304 result_index: usize,
305 id: usize,
306 stop: &AtomicBool,
307 ) -> FileResult {
308 self.bytes.clear();
309 if stop.load(Ordering::SeqCst) {
310 return FileResult {
311 index: result_index,
312 output: ChunkOutput {
313 bytes: Vec::new(),
314 matched: false,
315 },
316 };
317 }
318
319 let Some(candidate) = index.file_path(id) else {
320 return FileResult {
321 index: result_index,
322 output: ChunkOutput {
323 bytes: Vec::new(),
324 matched: false,
325 },
326 };
327 };
328 let actual = index.root.join(candidate);
329 let matched = {
330 let mut sink = StandardSink::new(&self.matcher, self.output, &actual, &mut self.bytes);
331 let _ = self.searcher.search_path(&self.matcher, &actual, &mut sink);
332 sink.matched
333 };
334
335 FileResult {
336 index: result_index,
337 output: ChunkOutput {
338 bytes: self.bytes.clone(),
339 matched,
340 },
341 }
342 }
343}
344
345struct StandardSink<'a> {
346 matcher: &'a RegexMatcher,
347 output: SearchOutput,
348 path: &'a Path,
349 bytes: &'a mut Vec<u8>,
350 matched: bool,
351 match_count: usize,
352}
353
354impl<'a> StandardSink<'a> {
355 const fn new(
356 matcher: &'a RegexMatcher,
357 output: SearchOutput,
358 path: &'a Path,
359 bytes: &'a mut Vec<u8>,
360 ) -> Self {
361 Self {
362 matcher,
363 output,
364 path,
365 bytes,
366 matched: false,
367 match_count: 0,
368 }
369 }
370}
371
372impl Sink for StandardSink<'_> {
373 type Error = io::Error;
374
375 fn matched(&mut self, _: &Searcher, mat: &SinkMatch<'_>) -> Result<bool, Self::Error> {
376 self.matched = true;
377 self.match_count += 1;
378
379 if matches!(self.output.mode, SearchMode::OnlyMatching) {
380 let line_number = mat.line_number();
381 let line = mat.bytes();
382 let _ = self.matcher.find_iter(line, |m: grep_matcher::Match| {
383 let _ = write_standard_prefix(self.bytes, self.output, self.path, line_number);
384 let _ = self.bytes.write_all(&line[m.start()..m.end()]);
385 let _ = self.bytes.write_all(b"\n");
386 true
387 });
388 return Ok(true);
389 }
390
391 write_standard_prefix(self.bytes, self.output, self.path, mat.line_number())?;
392 self.bytes.write_all(mat.bytes())?;
393 if !mat.bytes().ends_with(b"\n") {
394 self.bytes.write_all(b"\n")?;
395 }
396 Ok(true)
397 }
398}
399
400struct SummaryWorker {
401 matcher: RegexMatcher,
402 searcher: Searcher,
403}
404
405impl SummaryWorker {
406 fn new(search: &CompiledSearch, matcher: RegexMatcher) -> Self {
407 Self {
408 searcher: search.build_searcher(false, None),
409 matcher,
410 }
411 }
412
413 fn search_file(&mut self, path: &Path, mode: SearchMode) -> FileSummary {
414 let mut sink = SummarySink::new(mode);
415 let _ = self.searcher.search_path(&self.matcher, path, &mut sink);
416 sink.finish()
417 }
418
419 fn search_candidate(
420 &mut self,
421 index: &Index,
422 result_index: usize,
423 id: usize,
424 output: SearchOutput,
425 stop: &AtomicBool,
426 ) -> FileResult {
427 if stop.load(Ordering::SeqCst) {
428 return FileResult {
429 index: result_index,
430 output: ChunkOutput {
431 bytes: Vec::new(),
432 matched: false,
433 },
434 };
435 }
436
437 let Some(candidate) = index.file_path(id) else {
438 return FileResult {
439 index: result_index,
440 output: ChunkOutput {
441 bytes: Vec::new(),
442 matched: false,
443 },
444 };
445 };
446
447 let actual = index.root.join(candidate);
448 let result = self.search_file(&actual, output.mode);
449 let matched = mode_is_success(output.mode, result);
450 let mut bytes = Vec::new();
451 let _ = write_summary_record(&mut bytes, output, &actual, result);
452 if matches!(output.mode, SearchMode::Quiet) && result.matched {
453 stop.store(true, Ordering::SeqCst);
454 }
455
456 FileResult {
457 index: result_index,
458 output: ChunkOutput { bytes, matched },
459 }
460 }
461}
462
463struct FileResult {
464 index: usize,
465 output: ChunkOutput,
466}
467
468struct ChunkOutput {
469 bytes: Vec<u8>,
470 matched: bool,
471}
472
473fn flush_chunk_output(outputs: impl IntoIterator<Item = ChunkOutput>) -> crate::Result<bool> {
474 let mut stdout = io::stdout().lock();
475 let mut any_match = false;
476 for output in outputs {
477 any_match |= output.matched;
478 if output.bytes.is_empty() {
479 continue;
480 }
481 stdout.write_all(&output.bytes)?;
482 }
483 Ok(any_match)
484}
485
486#[derive(Clone, Copy)]
487struct FileSummary {
488 matched: bool,
489 count: usize,
490}
491
492struct SummarySink {
493 mode: SearchMode,
494 matched: bool,
495 count: usize,
496}
497
498impl SummarySink {
499 const fn new(mode: SearchMode) -> Self {
500 Self {
501 mode,
502 matched: false,
503 count: 0,
504 }
505 }
506
507 const fn finish(self) -> FileSummary {
508 FileSummary {
509 matched: self.matched,
510 count: self.count,
511 }
512 }
513}
514
515impl Sink for SummarySink {
516 type Error = io::Error;
517
518 fn matched(&mut self, _: &Searcher, _: &SinkMatch<'_>) -> Result<bool, Self::Error> {
519 self.matched = true;
520 self.count += 1;
521 Ok(matches!(self.mode, SearchMode::Count))
522 }
523}
524
525fn write_summary_record(
526 out: &mut Vec<u8>,
527 output: SearchOutput,
528 path: &Path,
529 result: FileSummary,
530) -> io::Result<()> {
531 match output.mode {
532 SearchMode::Count => {
533 if output.with_filename {
534 writeln!(out, "{}:{}", path.display(), result.count)
535 } else {
536 writeln!(out, "{}", result.count)
537 }
538 }
539 SearchMode::FilesWithMatches => {
540 if result.matched && output.with_filename {
541 writeln!(out, "{}", path.display())
542 } else {
543 Ok(())
544 }
545 }
546 SearchMode::FilesWithoutMatch => {
547 if !result.matched && output.with_filename {
548 writeln!(out, "{}", path.display())
549 } else {
550 Ok(())
551 }
552 }
553 SearchMode::Quiet => Ok(()),
554 SearchMode::Standard | SearchMode::OnlyMatching => unreachable!(),
555 }
556}
557
558fn write_standard_prefix(
559 out: &mut Vec<u8>,
560 output: SearchOutput,
561 path: &Path,
562 line_number: Option<u64>,
563) -> io::Result<()> {
564 if output.with_filename {
565 write!(out, "{}:", path.display())?;
566 }
567 if output.line_number {
568 write!(out, "{}:", line_number.unwrap_or(0))?;
569 }
570 Ok(())
571}
572
573const fn mode_is_success(mode: SearchMode, result: FileSummary) -> bool {
574 match mode {
575 SearchMode::Count | SearchMode::FilesWithMatches | SearchMode::Quiet => result.matched,
576 SearchMode::FilesWithoutMatch => !result.matched,
577 SearchMode::Standard | SearchMode::OnlyMatching => unreachable!(),
578 }
579}
580
581fn path_in_scope(rel: &Path, prefixes: &[PathBuf]) -> bool {
582 if prefixes.is_empty() {
583 return true;
584 }
585 prefixes
586 .iter()
587 .any(|pre| rel.starts_with(pre) || rel.as_os_str() == pre.as_os_str())
588}
589
590pub fn walk_file_paths(root: &Path) -> crate::Result<HashSet<PathBuf>> {
596 let root = root.canonicalize()?;
597 let mut set = HashSet::new();
598 let walker = ignore::WalkBuilder::new(&root).follow_links(false).build();
599 for entry in walker {
600 let entry = entry.map_err(crate::Error::Ignore)?;
601 if !entry.file_type().is_some_and(|ft| ft.is_file()) {
602 continue;
603 }
604 let path = entry.path();
605 let display = path.strip_prefix(&root).unwrap_or(path).to_path_buf();
606 set.insert(display);
607 }
608 Ok(set)
609}
610
611pub fn parallel_candidate_min_files() -> usize {
612 let cpus = std::thread::available_parallelism()
613 .map(std::num::NonZeroUsize::get)
614 .unwrap_or(1);
615 let rayon_threads = std::env::var("RAYON_NUM_THREADS")
616 .ok()
617 .and_then(|s| s.parse::<usize>().ok());
618 let effective = rayon_threads
619 .filter(|&n| n > 0)
620 .map_or(cpus, |rt| rt.min(cpus))
621 .max(1);
622 if effective <= 1 {
623 usize::MAX
624 } else {
625 effective.saturating_mul(8)
626 }
627}
628
629#[cfg(test)]
630struct CollectSink {
631 path: PathBuf,
632 only_matching: bool,
633 matcher: RegexMatcher,
634 matches: Vec<Match>,
635}
636
637#[cfg(test)]
638impl CollectSink {
639 fn new(path: PathBuf, only_matching: bool, matcher: RegexMatcher) -> Self {
640 Self {
641 path,
642 only_matching,
643 matcher,
644 matches: Vec::new(),
645 }
646 }
647
648 fn into_matches(self) -> Vec<Match> {
649 self.matches
650 }
651}
652
653#[cfg(test)]
654impl grep_searcher::Sink for CollectSink {
655 type Error = io::Error;
656
657 fn matched(
658 &mut self,
659 _: &grep_searcher::Searcher,
660 mat: &grep_searcher::SinkMatch<'_>,
661 ) -> Result<bool, Self::Error> {
662 let line = usize::try_from(mat.line_number().unwrap_or(0)).unwrap_or(0);
663 let line_bytes = mat.bytes();
664 if self.only_matching {
665 let _ = self
666 .matcher
667 .find_iter(line_bytes, |m: grep_matcher::Match| {
668 self.matches.push(Match {
669 file: self.path.clone(),
670 line,
671 text: String::from_utf8_lossy(&line_bytes[m.start()..m.end()]).into_owned(),
672 });
673 true
674 });
675 } else {
676 self.matches.push(Match {
677 file: self.path.clone(),
678 line,
679 text: String::from_utf8_lossy(line_bytes).into_owned(),
680 });
681 }
682 Ok(true)
683 }
684}