1use std::collections::HashSet;
2use std::io::{self, Write};
3use std::path::{Path, PathBuf};
4use std::sync::atomic::{AtomicBool, Ordering};
5
6use grep_matcher::Matcher;
7use grep_regex::RegexMatcher;
8use grep_searcher::{Searcher, Sink, SinkMatch};
9use rayon::prelude::*;
10
11use crate::planner::TrigramPlan;
12use crate::Index;
13
14use super::{
15 CandidateInfo, CompiledSearch, FilenameMode, OutputEmission, SearchFilter, SearchMode,
16 SearchOutput,
17};
18
19#[cfg(test)]
20use super::{GlobConfig, HiddenMode, IgnoreConfig, Match, SearchFilterConfig, VisibilityConfig};
21
22impl CompiledSearch {
23 #[must_use]
26 pub fn candidate_file_ids(&self, index: &Index, exhaustive: bool) -> Vec<usize> {
27 if exhaustive {
28 return (0..index.file_count()).collect();
29 }
30 match &self.plan {
31 TrigramPlan::FullScan => (0..index.file_count()).collect(),
32 TrigramPlan::Narrow { arms } => index
33 .candidate_file_ids(arms.as_slice())
34 .into_iter()
35 .map(|id| id as usize)
36 .collect(),
37 }
38 }
39
40 pub fn run_index(
46 &self,
47 index: &Index,
48 filter: &SearchFilter,
49 output: SearchOutput,
50 ) -> crate::Result<bool> {
51 if self.opts.max_results == Some(0) {
52 return Err(crate::Error::InvalidMaxCount);
53 }
54
55 let raw_ids = self.candidate_file_ids(index, Self::uses_exhaustive_candidates(output.mode));
57 if raw_ids.is_empty() {
58 return Ok(false);
59 }
60
61 let threshold = parallel_candidate_min_files();
63 let candidates = Self::prepare_candidates(index, &raw_ids, filter, threshold);
64 if candidates.is_empty() {
65 return Ok(false);
66 }
67
68 let matcher = self.build_matcher()?;
70 let parallel = candidates.len() >= threshold;
71
72 match output.mode {
73 SearchMode::Standard | SearchMode::OnlyMatching => {
74 self.run_standard_with_info(&candidates, &matcher, output, parallel)
75 }
76 SearchMode::Count
77 | SearchMode::CountMatches
78 | SearchMode::FilesWithMatches
79 | SearchMode::FilesWithoutMatch => {
80 self.run_summary_with_info(&candidates, &matcher, output, parallel)
81 }
82 }
83 }
84
85 #[must_use]
87 pub fn prepare_candidates(
88 index: &Index,
89 ids: &[usize],
90 filter: &SearchFilter,
91 threshold: usize,
92 ) -> Vec<CandidateInfo> {
93 if ids.len() >= threshold {
94 ids.par_iter()
95 .filter_map(|&id| {
96 let rel_path = index.file_path(id)?.to_path_buf();
97 let rel_str = rel_path.to_string_lossy().replace('\\', "/");
98 let abs_path = index.root.join(&rel_path);
99 let info = CandidateInfo {
100 id,
101 rel_path,
102 rel_str,
103 abs_path,
104 };
105 filter.is_candidate_info(&info).then_some(info)
106 })
107 .collect()
108 } else {
109 ids.iter()
110 .filter_map(|&id| {
111 let rel_path = index.file_path(id)?.to_path_buf();
112 let rel_str = rel_path.to_string_lossy().replace('\\', "/");
113 let abs_path = index.root.join(&rel_path);
114 let info = CandidateInfo {
115 id,
116 rel_path,
117 rel_str,
118 abs_path,
119 };
120 filter.is_candidate_info(&info).then_some(info)
121 })
122 .collect()
123 }
124 }
125
126 fn run_standard_with_info(
127 &self,
128 candidates: &[CandidateInfo],
129 matcher: &RegexMatcher,
130 output: SearchOutput,
131 parallel: bool,
132 ) -> crate::Result<bool> {
133 if parallel {
134 let stop = AtomicBool::new(false);
135 let mut files = candidates
136 .par_iter()
137 .enumerate()
138 .map_init(
139 || StandardWorker::new(self, matcher.clone(), output),
140 |worker: &mut StandardWorker<'_>,
141 (result_index, candidate): (usize, &CandidateInfo)| {
142 worker.search_candidate(candidate, result_index, &stop)
143 },
144 )
145 .collect::<Vec<_>>();
146 files.sort_by_key(|file| file.index);
147 return flush_chunk_output(files.into_iter().map(|file| file.output));
148 }
149
150 self.run_standard_capped_with_info(candidates, matcher, output)
151 }
152
153 fn run_summary_with_info(
154 &self,
155 candidates: &[CandidateInfo],
156 matcher: &RegexMatcher,
157 output: SearchOutput,
158 parallel: bool,
159 ) -> crate::Result<bool> {
160 if parallel {
161 let stop = AtomicBool::new(false);
162 let mut files = candidates
163 .par_iter()
164 .enumerate()
165 .map_init(
166 || {
167 SummaryWorker::new(
168 self,
169 matcher.clone(),
170 self.opts.max_results,
171 output.mode,
172 )
173 },
174 |worker: &mut SummaryWorker,
175 (result_index, candidate): (usize, &CandidateInfo)| {
176 worker.search_candidate(&candidate.abs_path, result_index, output, &stop)
177 },
178 )
179 .collect::<Vec<_>>();
180 files.sort_by_key(|file| file.index);
181 return flush_chunk_output(files.into_iter().map(|file| file.output));
182 }
183
184 self.run_summary_capped_with_info(candidates, matcher, output)
185 }
186
187 fn run_standard_capped_with_info(
188 &self,
189 candidates: &[CandidateInfo],
190 matcher: &RegexMatcher,
191 output: SearchOutput,
192 ) -> crate::Result<bool> {
193 let mut any_match = false;
194 let mut out = Vec::new();
195 let mut searcher = self.build_searcher(output.line_number, self.opts.max_results);
196 for candidate in candidates {
197 let mut sink = StandardSink::new(matcher, output, &candidate.abs_path, &mut out);
198 let _ = searcher.search_path(matcher, &candidate.abs_path, &mut sink);
199 any_match |= sink.matched;
200 if output.emission == OutputEmission::Quiet && any_match {
201 break;
202 }
203 }
204
205 flush_chunk_output(std::iter::once(ChunkOutput {
206 bytes: out,
207 matched: any_match,
208 }))
209 }
210
211 fn run_summary_capped_with_info(
212 &self,
213 candidates: &[CandidateInfo],
214 matcher: &RegexMatcher,
215 output: SearchOutput,
216 ) -> crate::Result<bool> {
217 let mut any_match = false;
218 let mut out = Vec::new();
219 let mut worker =
220 SummaryWorker::new(self, matcher.clone(), self.opts.max_results, output.mode);
221 for candidate in candidates {
222 let result = worker.search_file(&candidate.abs_path);
223 any_match |= mode_is_success(output.mode, result);
224 write_summary_record(&mut out, output, &candidate.abs_path, result)?;
225 if output.emission == OutputEmission::Quiet && mode_is_success(output.mode, result) {
226 break;
227 }
228 }
229
230 flush_chunk_output(std::iter::once(ChunkOutput {
231 bytes: out,
232 matched: any_match,
233 }))
234 }
235
236 #[cfg(test)]
239 pub(crate) fn collect_index_matches(&self, index: &Index) -> crate::Result<Vec<Match>> {
240 let config = SearchFilterConfig {
241 scopes: vec![],
242 glob: GlobConfig::default(),
243 visibility: VisibilityConfig {
244 hidden: HiddenMode::Include,
245 ignore: IgnoreConfig::default(),
246 },
247 };
248 let filter = SearchFilter::new(&config, &index.root)?;
249 let candidate_ids = self.candidate_file_ids(index, false);
250 self.collect_index_candidates(index, &filter, &candidate_ids)
251 }
252
253 #[cfg(test)]
254 pub(crate) fn collect_walk_matches(&self, root: &Path) -> crate::Result<Vec<Match>> {
255 let root = root.canonicalize()?;
256 let mut candidates = Vec::new();
257 let walker = ignore::WalkBuilder::new(&root)
258 .follow_links(false)
259 .hidden(false)
260 .parents(false)
261 .ignore(false)
262 .git_global(false)
263 .git_ignore(false)
264 .git_exclude(false)
265 .require_git(false)
266 .build();
267 for entry in walker {
268 let entry = entry.map_err(crate::Error::Ignore)?;
269 if entry.file_type().is_some_and(|ft| ft.is_file()) {
270 let path = entry.path();
271 if path.components().any(|c| c.as_os_str() == ".sift") {
272 continue;
273 }
274 candidates.push(path.to_path_buf());
275 }
276 }
277 self.collect_walk_candidates(&candidates)
278 }
279
280 #[cfg(test)]
281 fn collect_index_candidates(
282 &self,
283 index: &Index,
284 filter: &SearchFilter,
285 candidate_ids: &[usize],
286 ) -> crate::Result<Vec<Match>> {
287 let matcher = self.build_matcher()?;
288 let mut searcher = self.build_searcher(true, None);
289 let mut out = Vec::new();
290 for &id in candidate_ids {
291 let Some(candidate) = index.file_path(id) else {
292 continue;
293 };
294 if !filter.is_candidate(candidate) {
295 continue;
296 }
297 let mut sink = CollectSink::new(
298 index.root.join(candidate),
299 self.opts.only_matching(),
300 matcher.clone(),
301 );
302 let _ = searcher.search_path(&matcher, index.root.join(candidate), &mut sink);
303 out.extend(sink.into_matches());
304 }
305 Ok(out)
306 }
307
308 #[cfg(test)]
309 fn collect_walk_candidates(&self, candidates: &[PathBuf]) -> crate::Result<Vec<Match>> {
310 let matcher = self.build_matcher()?;
311 let mut searcher = self.build_searcher(true, None);
312 let mut out = Vec::new();
313 for candidate in candidates {
314 let mut sink = CollectSink::new(
315 candidate.clone(),
316 self.opts.only_matching(),
317 matcher.clone(),
318 );
319 let _ = searcher.search_path(&matcher, candidate, &mut sink);
320 out.extend(sink.into_matches());
321 }
322 Ok(out)
323 }
324}
325
326struct StandardWorker<'a> {
327 search: &'a CompiledSearch,
328 matcher: RegexMatcher,
329 output: SearchOutput,
330 bytes: Vec<u8>,
331}
332
333impl<'a> StandardWorker<'a> {
334 const fn new(search: &'a CompiledSearch, matcher: RegexMatcher, output: SearchOutput) -> Self {
335 Self {
336 search,
337 matcher,
338 output,
339 bytes: Vec::new(),
340 }
341 }
342
343 fn search_candidate(
344 &mut self,
345 candidate: &CandidateInfo,
346 result_index: usize,
347 stop: &AtomicBool,
348 ) -> FileResult {
349 self.bytes.clear();
350 if stop.load(Ordering::SeqCst) {
351 return FileResult {
352 index: result_index,
353 output: ChunkOutput::empty(),
354 };
355 }
356
357 let matched = {
358 let mut searcher = self
359 .search
360 .build_searcher(self.output.line_number, self.search.opts.max_results);
361 let mut sink = StandardSink::new(
362 &self.matcher,
363 self.output,
364 &candidate.abs_path,
365 &mut self.bytes,
366 );
367 let _ = searcher.search_path(&self.matcher, &candidate.abs_path, &mut sink);
368 sink.matched
369 };
370
371 if self.output.emission == OutputEmission::Quiet && matched {
372 stop.store(true, Ordering::SeqCst);
373 }
374
375 FileResult {
377 index: result_index,
378 output: ChunkOutput {
379 bytes: std::mem::take(&mut self.bytes),
380 matched,
381 },
382 }
383 }
384}
385
386struct StandardSink<'a> {
387 matcher: &'a RegexMatcher,
388 output: SearchOutput,
389 path: &'a Path,
390 bytes: &'a mut Vec<u8>,
391 matched: bool,
392 match_count: usize,
393}
394
395impl<'a> StandardSink<'a> {
396 const fn new(
397 matcher: &'a RegexMatcher,
398 output: SearchOutput,
399 path: &'a Path,
400 bytes: &'a mut Vec<u8>,
401 ) -> Self {
402 Self {
403 matcher,
404 output,
405 path,
406 bytes,
407 matched: false,
408 match_count: 0,
409 }
410 }
411}
412
413impl Sink for StandardSink<'_> {
414 type Error = io::Error;
415
416 fn matched(&mut self, _: &Searcher, mat: &SinkMatch<'_>) -> Result<bool, Self::Error> {
417 self.matched = true;
418 self.match_count += 1;
419
420 if self.output.emission == OutputEmission::Quiet {
421 return Ok(true);
422 }
423
424 if matches!(self.output.mode, SearchMode::OnlyMatching) {
425 let line_number = mat.line_number();
426 let line = mat.bytes();
427 let _ = self.matcher.find_iter(line, |m: grep_matcher::Match| {
428 let _ = write_standard_prefix(self.bytes, self.output, self.path, line_number);
429 let _ = self.bytes.write_all(&line[m.start()..m.end()]);
430 let _ = self.bytes.write_all(b"\n");
431 true
432 });
433 return Ok(true);
434 }
435
436 write_standard_prefix(self.bytes, self.output, self.path, mat.line_number())?;
437 self.bytes.write_all(mat.bytes())?;
438 if !mat.bytes().ends_with(b"\n") {
439 self.bytes.write_all(b"\n")?;
440 }
441 Ok(true)
442 }
443}
444
445struct SummaryWorker {
446 matcher: RegexMatcher,
447 searcher: Searcher,
448 mode: SearchMode,
449}
450
451impl SummaryWorker {
452 fn new(
453 search: &CompiledSearch,
454 matcher: RegexMatcher,
455 max_results: Option<usize>,
456 mode: SearchMode,
457 ) -> Self {
458 Self {
459 searcher: search.build_searcher(false, max_results),
460 matcher,
461 mode,
462 }
463 }
464
465 fn search_file(&mut self, path: &Path) -> FileSummary {
466 let sink_matcher = if self.mode == SearchMode::CountMatches {
467 Some(self.matcher.clone())
468 } else {
469 None
470 };
471 let mut sink = SummarySink::new(self.mode, sink_matcher);
472 let _ = self.searcher.search_path(&self.matcher, path, &mut sink);
473 sink.finish()
474 }
475
476 fn search_candidate(
477 &mut self,
478 path: &Path,
479 result_index: usize,
480 output: SearchOutput,
481 stop: &AtomicBool,
482 ) -> FileResult {
483 if stop.load(Ordering::SeqCst) {
484 return FileResult {
485 index: result_index,
486 output: ChunkOutput::empty(),
487 };
488 }
489
490 let result = self.search_file(path);
491 let matched = mode_is_success(output.mode, result);
492 let mut bytes = Vec::new();
493 let _ = write_summary_record(&mut bytes, output, path, result);
494 if output.emission == OutputEmission::Quiet && mode_is_success(output.mode, result) {
495 stop.store(true, Ordering::SeqCst);
496 }
497
498 FileResult {
499 index: result_index,
500 output: ChunkOutput { bytes, matched },
501 }
502 }
503}
504
505struct FileResult {
506 index: usize,
507 output: ChunkOutput,
508}
509
510struct ChunkOutput {
511 bytes: Vec<u8>,
512 matched: bool,
513}
514
515impl ChunkOutput {
516 const fn empty() -> Self {
517 Self {
518 bytes: Vec::new(),
519 matched: false,
520 }
521 }
522}
523
524fn flush_chunk_output(outputs: impl IntoIterator<Item = ChunkOutput>) -> crate::Result<bool> {
525 let mut stdout = io::stdout().lock();
526 let mut any_match = false;
527 for output in outputs {
528 any_match |= output.matched;
529 if output.bytes.is_empty() {
530 continue;
531 }
532 stdout.write_all(&output.bytes)?;
533 }
534 Ok(any_match)
535}
536
537#[derive(Clone, Copy)]
538struct FileSummary {
539 matched: bool,
540 count: usize,
541}
542
543struct SummarySink {
544 mode: SearchMode,
545 matcher: Option<RegexMatcher>,
546 matched: bool,
547 count: usize,
548}
549
550impl SummarySink {
551 const fn new(mode: SearchMode, matcher: Option<RegexMatcher>) -> Self {
552 Self {
553 mode,
554 matcher,
555 matched: false,
556 count: 0,
557 }
558 }
559
560 fn finish(self) -> FileSummary {
561 FileSummary {
562 matched: self.matched,
563 count: self.count,
564 }
565 }
566}
567
568impl Sink for SummarySink {
569 type Error = io::Error;
570
571 fn matched(&mut self, _: &Searcher, mat: &SinkMatch<'_>) -> Result<bool, Self::Error> {
572 self.matched = true;
573 if self.mode == SearchMode::CountMatches {
574 if let Some(ref matcher) = self.matcher {
575 let line = mat.bytes();
576 let mut n = 0;
577 let _ = matcher.find_iter(line, |_| {
578 n += 1;
579 true
580 });
581 self.count += n;
582 }
583 } else {
584 self.count += 1;
585 }
586 Ok(matches!(
587 self.mode,
588 SearchMode::Count | SearchMode::CountMatches
589 ))
590 }
591}
592
593fn write_summary_record(
594 out: &mut Vec<u8>,
595 output: SearchOutput,
596 path: &Path,
597 result: FileSummary,
598) -> io::Result<()> {
599 if output.emission == OutputEmission::Quiet {
600 return Ok(());
601 }
602 match output.mode {
603 SearchMode::Count | SearchMode::CountMatches => {
604 if result.count == 0 {
605 return Ok(());
606 }
607 let print_filename = output.filename_mode != FilenameMode::Never;
608 if print_filename {
609 writeln!(out, "{}:{}", path.display(), result.count)
610 } else {
611 writeln!(out, "{}", result.count)
612 }
613 }
614 SearchMode::FilesWithMatches => {
615 if result.matched {
616 writeln!(out, "{}", path.display())
617 } else {
618 Ok(())
619 }
620 }
621 SearchMode::FilesWithoutMatch => {
622 if result.matched {
623 Ok(())
624 } else {
625 writeln!(out, "{}", path.display())
626 }
627 }
628 SearchMode::Standard | SearchMode::OnlyMatching => unreachable!(),
629 }
630}
631
632fn write_standard_prefix(
633 out: &mut Vec<u8>,
634 output: SearchOutput,
635 path: &Path,
636 line_number: Option<u64>,
637) -> io::Result<()> {
638 let print_filename = output.filename_mode != FilenameMode::Never;
639 if print_filename {
640 write!(out, "{}:", path.display())?;
641 }
642 if output.line_number {
643 write!(out, "{}:", line_number.unwrap_or(0))?;
644 }
645 Ok(())
646}
647
648#[allow(clippy::match_same_arms)]
649const fn mode_is_success(mode: SearchMode, result: FileSummary) -> bool {
650 match mode {
651 SearchMode::Count | SearchMode::CountMatches => result.count > 0,
652 SearchMode::FilesWithMatches => result.matched,
653 SearchMode::FilesWithoutMatch => !result.matched,
654 SearchMode::Standard | SearchMode::OnlyMatching => result.matched,
655 }
656}
657
658pub fn walk_file_paths(root: &Path) -> crate::Result<HashSet<PathBuf>> {
662 let root = root.canonicalize()?;
663 let mut set = HashSet::new();
664 let walker = ignore::WalkBuilder::new(&root).follow_links(false).build();
665 for entry in walker {
666 let entry = entry.map_err(crate::Error::Ignore)?;
667 if !entry.file_type().is_some_and(|ft| ft.is_file()) {
668 continue;
669 }
670 let path = entry.path();
671 let display = path.strip_prefix(&root).unwrap_or(path).to_path_buf();
672 set.insert(display);
673 }
674 Ok(set)
675}
676
677pub fn parallel_candidate_min_files() -> usize {
678 let cpus = std::thread::available_parallelism()
679 .map(std::num::NonZeroUsize::get)
680 .unwrap_or(1);
681 let rayon_threads = std::env::var("RAYON_NUM_THREADS")
682 .ok()
683 .and_then(|s| s.parse::<usize>().ok());
684 let effective = rayon_threads
685 .filter(|&n| n > 0)
686 .map_or(cpus, |rt| rt.min(cpus))
687 .max(1);
688 if effective <= 1 {
689 usize::MAX
690 } else {
691 effective.saturating_mul(8)
692 }
693}
694
695#[cfg(test)]
696struct CollectSink {
697 path: PathBuf,
698 only_matching: bool,
699 matcher: RegexMatcher,
700 matches: Vec<Match>,
701}
702
703#[cfg(test)]
704impl CollectSink {
705 fn new(path: PathBuf, only_matching: bool, matcher: RegexMatcher) -> Self {
706 Self {
707 path,
708 only_matching,
709 matcher,
710 matches: Vec::new(),
711 }
712 }
713
714 fn into_matches(self) -> Vec<Match> {
715 self.matches
716 }
717}
718
719#[cfg(test)]
720impl grep_searcher::Sink for CollectSink {
721 type Error = io::Error;
722
723 fn matched(
724 &mut self,
725 _: &grep_searcher::Searcher,
726 mat: &grep_searcher::SinkMatch<'_>,
727 ) -> Result<bool, Self::Error> {
728 let line = usize::try_from(mat.line_number().unwrap_or(0)).unwrap_or(0);
729 let line_bytes = mat.bytes();
730 if self.only_matching {
731 let _ = self
732 .matcher
733 .find_iter(line_bytes, |m: grep_matcher::Match| {
734 self.matches.push(Match {
735 file: self.path.clone(),
736 line,
737 text: String::from_utf8_lossy(&line_bytes[m.start()..m.end()]).into_owned(),
738 });
739 true
740 });
741 } else {
742 self.matches.push(Match {
743 file: self.path.clone(),
744 line,
745 text: String::from_utf8_lossy(line_bytes).into_owned(),
746 });
747 }
748 Ok(true)
749 }
750}