Skip to main content

sift_core/search/
execute.rs

1use std::collections::HashSet;
2use std::io::{self, Write};
3use std::path::{Path, PathBuf};
4use std::sync::atomic::{AtomicBool, Ordering};
5
6use grep_matcher::Matcher;
7use grep_regex::RegexMatcher;
8use grep_searcher::{Searcher, Sink, SinkMatch};
9use rayon::prelude::*;
10
11use crate::planner::TrigramPlan;
12use crate::Index;
13
14use super::{
15    CandidateInfo, CompiledSearch, FilenameMode, OutputEmission, SearchFilter, SearchMode,
16    SearchOutput,
17};
18
19#[cfg(test)]
20use super::{GlobConfig, HiddenMode, IgnoreConfig, Match, SearchFilterConfig, VisibilityConfig};
21
22impl CompiledSearch {
23    /// Returns raw candidate file IDs from index (trigram or full scan).
24    /// Does NOT apply `SearchFilter` - filtering happens in `prepare_candidates`.
25    #[must_use]
26    pub fn candidate_file_ids(&self, index: &Index, exhaustive: bool) -> Vec<usize> {
27        if exhaustive {
28            return (0..index.file_count()).collect();
29        }
30        match &self.plan {
31            TrigramPlan::FullScan => (0..index.file_count()).collect(),
32            TrigramPlan::Narrow { arms } => index
33                .candidate_file_ids(arms.as_slice())
34                .into_iter()
35                .map(|id| id as usize)
36                .collect(),
37        }
38    }
39
40    /// Execute a search over an opened index and print results to stdout.
41    ///
42    /// # Errors
43    ///
44    /// Returns an error if the matcher cannot be built or stdout cannot be written.
45    pub fn run_index(
46        &self,
47        index: &Index,
48        filter: &SearchFilter,
49        output: SearchOutput,
50    ) -> crate::Result<bool> {
51        if self.opts.max_results == Some(0) {
52            return Err(crate::Error::InvalidMaxCount);
53        }
54
55        // Stage 1: Get raw candidate IDs from index (trigram or full scan)
56        let raw_ids = self.candidate_file_ids(index, Self::uses_exhaustive_candidates(output.mode));
57        if raw_ids.is_empty() {
58            return Ok(false);
59        }
60
61        // Stage 2+3: Parallel filter + prepare CandidateInfo (single filter pass)
62        let threshold = parallel_candidate_min_files();
63        let candidates = Self::prepare_candidates(index, &raw_ids, filter, threshold);
64        if candidates.is_empty() {
65            return Ok(false);
66        }
67
68        // Stage 4: Build matcher and search
69        let matcher = self.build_matcher()?;
70        let parallel = candidates.len() >= threshold;
71
72        match output.mode {
73            SearchMode::Standard | SearchMode::OnlyMatching => {
74                self.run_standard_with_info(&candidates, &matcher, output, parallel)
75            }
76            SearchMode::Count
77            | SearchMode::CountMatches
78            | SearchMode::FilesWithMatches
79            | SearchMode::FilesWithoutMatch => {
80                self.run_summary_with_info(&candidates, &matcher, output, parallel)
81            }
82        }
83    }
84
85    /// Prepare `CandidateInfo` with parallel filter + path prep.
86    #[must_use]
87    pub fn prepare_candidates(
88        index: &Index,
89        ids: &[usize],
90        filter: &SearchFilter,
91        threshold: usize,
92    ) -> Vec<CandidateInfo> {
93        if ids.len() >= threshold {
94            ids.par_iter()
95                .filter_map(|&id| {
96                    let rel_path = index.file_path(id)?.to_path_buf();
97                    let rel_str = rel_path.to_string_lossy().replace('\\', "/");
98                    let abs_path = index.root.join(&rel_path);
99                    let info = CandidateInfo {
100                        id,
101                        rel_path,
102                        rel_str,
103                        abs_path,
104                    };
105                    filter.is_candidate_info(&info).then_some(info)
106                })
107                .collect()
108        } else {
109            ids.iter()
110                .filter_map(|&id| {
111                    let rel_path = index.file_path(id)?.to_path_buf();
112                    let rel_str = rel_path.to_string_lossy().replace('\\', "/");
113                    let abs_path = index.root.join(&rel_path);
114                    let info = CandidateInfo {
115                        id,
116                        rel_path,
117                        rel_str,
118                        abs_path,
119                    };
120                    filter.is_candidate_info(&info).then_some(info)
121                })
122                .collect()
123        }
124    }
125
126    fn run_standard_with_info(
127        &self,
128        candidates: &[CandidateInfo],
129        matcher: &RegexMatcher,
130        output: SearchOutput,
131        parallel: bool,
132    ) -> crate::Result<bool> {
133        if parallel {
134            let stop = AtomicBool::new(false);
135            let mut files = candidates
136                .par_iter()
137                .enumerate()
138                .map_init(
139                    || StandardWorker::new(self, matcher.clone(), output),
140                    |worker: &mut StandardWorker<'_>,
141                     (result_index, candidate): (usize, &CandidateInfo)| {
142                        worker.search_candidate(candidate, result_index, &stop)
143                    },
144                )
145                .collect::<Vec<_>>();
146            files.sort_by_key(|file| file.index);
147            return flush_chunk_output(files.into_iter().map(|file| file.output));
148        }
149
150        self.run_standard_capped_with_info(candidates, matcher, output)
151    }
152
153    fn run_summary_with_info(
154        &self,
155        candidates: &[CandidateInfo],
156        matcher: &RegexMatcher,
157        output: SearchOutput,
158        parallel: bool,
159    ) -> crate::Result<bool> {
160        if parallel {
161            let stop = AtomicBool::new(false);
162            let mut files = candidates
163                .par_iter()
164                .enumerate()
165                .map_init(
166                    || {
167                        SummaryWorker::new(
168                            self,
169                            matcher.clone(),
170                            self.opts.max_results,
171                            output.mode,
172                        )
173                    },
174                    |worker: &mut SummaryWorker,
175                     (result_index, candidate): (usize, &CandidateInfo)| {
176                        worker.search_candidate(&candidate.abs_path, result_index, output, &stop)
177                    },
178                )
179                .collect::<Vec<_>>();
180            files.sort_by_key(|file| file.index);
181            return flush_chunk_output(files.into_iter().map(|file| file.output));
182        }
183
184        self.run_summary_capped_with_info(candidates, matcher, output)
185    }
186
187    fn run_standard_capped_with_info(
188        &self,
189        candidates: &[CandidateInfo],
190        matcher: &RegexMatcher,
191        output: SearchOutput,
192    ) -> crate::Result<bool> {
193        let mut any_match = false;
194        let mut out = Vec::new();
195        let mut searcher = self.build_searcher(output.line_number, self.opts.max_results);
196        for candidate in candidates {
197            let mut sink = StandardSink::new(matcher, output, &candidate.abs_path, &mut out);
198            let _ = searcher.search_path(matcher, &candidate.abs_path, &mut sink);
199            any_match |= sink.matched;
200            if output.emission == OutputEmission::Quiet && any_match {
201                break;
202            }
203        }
204
205        flush_chunk_output(std::iter::once(ChunkOutput {
206            bytes: out,
207            matched: any_match,
208        }))
209    }
210
211    fn run_summary_capped_with_info(
212        &self,
213        candidates: &[CandidateInfo],
214        matcher: &RegexMatcher,
215        output: SearchOutput,
216    ) -> crate::Result<bool> {
217        let mut any_match = false;
218        let mut out = Vec::new();
219        let mut worker =
220            SummaryWorker::new(self, matcher.clone(), self.opts.max_results, output.mode);
221        for candidate in candidates {
222            let result = worker.search_file(&candidate.abs_path);
223            any_match |= mode_is_success(output.mode, result);
224            write_summary_record(&mut out, output, &candidate.abs_path, result)?;
225            if output.emission == OutputEmission::Quiet && mode_is_success(output.mode, result) {
226                break;
227            }
228        }
229
230        flush_chunk_output(std::iter::once(ChunkOutput {
231            bytes: out,
232            matched: any_match,
233        }))
234    }
235
236    // Legacy methods kept for backward compat in tests
237
238    #[cfg(test)]
239    pub(crate) fn collect_index_matches(&self, index: &Index) -> crate::Result<Vec<Match>> {
240        let config = SearchFilterConfig {
241            scopes: vec![],
242            glob: GlobConfig::default(),
243            visibility: VisibilityConfig {
244                hidden: HiddenMode::Include,
245                ignore: IgnoreConfig::default(),
246            },
247        };
248        let filter = SearchFilter::new(&config, &index.root)?;
249        let candidate_ids = self.candidate_file_ids(index, false);
250        self.collect_index_candidates(index, &filter, &candidate_ids)
251    }
252
253    #[cfg(test)]
254    pub(crate) fn collect_walk_matches(&self, root: &Path) -> crate::Result<Vec<Match>> {
255        let root = root.canonicalize()?;
256        let mut candidates = Vec::new();
257        let walker = ignore::WalkBuilder::new(&root)
258            .follow_links(false)
259            .hidden(false)
260            .parents(false)
261            .ignore(false)
262            .git_global(false)
263            .git_ignore(false)
264            .git_exclude(false)
265            .require_git(false)
266            .build();
267        for entry in walker {
268            let entry = entry.map_err(crate::Error::Ignore)?;
269            if entry.file_type().is_some_and(|ft| ft.is_file()) {
270                let path = entry.path();
271                if path.components().any(|c| c.as_os_str() == ".sift") {
272                    continue;
273                }
274                candidates.push(path.to_path_buf());
275            }
276        }
277        self.collect_walk_candidates(&candidates)
278    }
279
280    #[cfg(test)]
281    fn collect_index_candidates(
282        &self,
283        index: &Index,
284        filter: &SearchFilter,
285        candidate_ids: &[usize],
286    ) -> crate::Result<Vec<Match>> {
287        let matcher = self.build_matcher()?;
288        let mut searcher = self.build_searcher(true, None);
289        let mut out = Vec::new();
290        for &id in candidate_ids {
291            let Some(candidate) = index.file_path(id) else {
292                continue;
293            };
294            if !filter.is_candidate(candidate) {
295                continue;
296            }
297            let mut sink = CollectSink::new(
298                index.root.join(candidate),
299                self.opts.only_matching(),
300                matcher.clone(),
301            );
302            let _ = searcher.search_path(&matcher, index.root.join(candidate), &mut sink);
303            out.extend(sink.into_matches());
304        }
305        Ok(out)
306    }
307
308    #[cfg(test)]
309    fn collect_walk_candidates(&self, candidates: &[PathBuf]) -> crate::Result<Vec<Match>> {
310        let matcher = self.build_matcher()?;
311        let mut searcher = self.build_searcher(true, None);
312        let mut out = Vec::new();
313        for candidate in candidates {
314            let mut sink = CollectSink::new(
315                candidate.clone(),
316                self.opts.only_matching(),
317                matcher.clone(),
318            );
319            let _ = searcher.search_path(&matcher, candidate, &mut sink);
320            out.extend(sink.into_matches());
321        }
322        Ok(out)
323    }
324}
325
326struct StandardWorker<'a> {
327    search: &'a CompiledSearch,
328    matcher: RegexMatcher,
329    output: SearchOutput,
330    bytes: Vec<u8>,
331}
332
333impl<'a> StandardWorker<'a> {
334    const fn new(search: &'a CompiledSearch, matcher: RegexMatcher, output: SearchOutput) -> Self {
335        Self {
336            search,
337            matcher,
338            output,
339            bytes: Vec::new(),
340        }
341    }
342
343    fn search_candidate(
344        &mut self,
345        candidate: &CandidateInfo,
346        result_index: usize,
347        stop: &AtomicBool,
348    ) -> FileResult {
349        self.bytes.clear();
350        if stop.load(Ordering::SeqCst) {
351            return FileResult {
352                index: result_index,
353                output: ChunkOutput::empty(),
354            };
355        }
356
357        let matched = {
358            let mut searcher = self
359                .search
360                .build_searcher(self.output.line_number, self.search.opts.max_results);
361            let mut sink = StandardSink::new(
362                &self.matcher,
363                self.output,
364                &candidate.abs_path,
365                &mut self.bytes,
366            );
367            let _ = searcher.search_path(&self.matcher, &candidate.abs_path, &mut sink);
368            sink.matched
369        };
370
371        if self.output.emission == OutputEmission::Quiet && matched {
372            stop.store(true, Ordering::SeqCst);
373        }
374
375        // P0 fix: use mem::take instead of clone - avoids allocation when bytes is empty (quiet mode)
376        FileResult {
377            index: result_index,
378            output: ChunkOutput {
379                bytes: std::mem::take(&mut self.bytes),
380                matched,
381            },
382        }
383    }
384}
385
386struct StandardSink<'a> {
387    matcher: &'a RegexMatcher,
388    output: SearchOutput,
389    path: &'a Path,
390    bytes: &'a mut Vec<u8>,
391    matched: bool,
392    match_count: usize,
393}
394
395impl<'a> StandardSink<'a> {
396    const fn new(
397        matcher: &'a RegexMatcher,
398        output: SearchOutput,
399        path: &'a Path,
400        bytes: &'a mut Vec<u8>,
401    ) -> Self {
402        Self {
403            matcher,
404            output,
405            path,
406            bytes,
407            matched: false,
408            match_count: 0,
409        }
410    }
411}
412
413impl Sink for StandardSink<'_> {
414    type Error = io::Error;
415
416    fn matched(&mut self, _: &Searcher, mat: &SinkMatch<'_>) -> Result<bool, Self::Error> {
417        self.matched = true;
418        self.match_count += 1;
419
420        if self.output.emission == OutputEmission::Quiet {
421            return Ok(true);
422        }
423
424        if matches!(self.output.mode, SearchMode::OnlyMatching) {
425            let line_number = mat.line_number();
426            let line = mat.bytes();
427            let _ = self.matcher.find_iter(line, |m: grep_matcher::Match| {
428                let _ = write_standard_prefix(self.bytes, self.output, self.path, line_number);
429                let _ = self.bytes.write_all(&line[m.start()..m.end()]);
430                let _ = self.bytes.write_all(b"\n");
431                true
432            });
433            return Ok(true);
434        }
435
436        write_standard_prefix(self.bytes, self.output, self.path, mat.line_number())?;
437        self.bytes.write_all(mat.bytes())?;
438        if !mat.bytes().ends_with(b"\n") {
439            self.bytes.write_all(b"\n")?;
440        }
441        Ok(true)
442    }
443}
444
445struct SummaryWorker {
446    matcher: RegexMatcher,
447    searcher: Searcher,
448    mode: SearchMode,
449}
450
451impl SummaryWorker {
452    fn new(
453        search: &CompiledSearch,
454        matcher: RegexMatcher,
455        max_results: Option<usize>,
456        mode: SearchMode,
457    ) -> Self {
458        Self {
459            searcher: search.build_searcher(false, max_results),
460            matcher,
461            mode,
462        }
463    }
464
465    fn search_file(&mut self, path: &Path) -> FileSummary {
466        let sink_matcher = if self.mode == SearchMode::CountMatches {
467            Some(self.matcher.clone())
468        } else {
469            None
470        };
471        let mut sink = SummarySink::new(self.mode, sink_matcher);
472        let _ = self.searcher.search_path(&self.matcher, path, &mut sink);
473        sink.finish()
474    }
475
476    fn search_candidate(
477        &mut self,
478        path: &Path,
479        result_index: usize,
480        output: SearchOutput,
481        stop: &AtomicBool,
482    ) -> FileResult {
483        if stop.load(Ordering::SeqCst) {
484            return FileResult {
485                index: result_index,
486                output: ChunkOutput::empty(),
487            };
488        }
489
490        let result = self.search_file(path);
491        let matched = mode_is_success(output.mode, result);
492        let mut bytes = Vec::new();
493        let _ = write_summary_record(&mut bytes, output, path, result);
494        if output.emission == OutputEmission::Quiet && mode_is_success(output.mode, result) {
495            stop.store(true, Ordering::SeqCst);
496        }
497
498        FileResult {
499            index: result_index,
500            output: ChunkOutput { bytes, matched },
501        }
502    }
503}
504
505struct FileResult {
506    index: usize,
507    output: ChunkOutput,
508}
509
510struct ChunkOutput {
511    bytes: Vec<u8>,
512    matched: bool,
513}
514
515impl ChunkOutput {
516    const fn empty() -> Self {
517        Self {
518            bytes: Vec::new(),
519            matched: false,
520        }
521    }
522}
523
524fn flush_chunk_output(outputs: impl IntoIterator<Item = ChunkOutput>) -> crate::Result<bool> {
525    let mut stdout = io::stdout().lock();
526    let mut any_match = false;
527    for output in outputs {
528        any_match |= output.matched;
529        if output.bytes.is_empty() {
530            continue;
531        }
532        stdout.write_all(&output.bytes)?;
533    }
534    Ok(any_match)
535}
536
537#[derive(Clone, Copy)]
538struct FileSummary {
539    matched: bool,
540    count: usize,
541}
542
543struct SummarySink {
544    mode: SearchMode,
545    matcher: Option<RegexMatcher>,
546    matched: bool,
547    count: usize,
548}
549
550impl SummarySink {
551    const fn new(mode: SearchMode, matcher: Option<RegexMatcher>) -> Self {
552        Self {
553            mode,
554            matcher,
555            matched: false,
556            count: 0,
557        }
558    }
559
560    fn finish(self) -> FileSummary {
561        FileSummary {
562            matched: self.matched,
563            count: self.count,
564        }
565    }
566}
567
568impl Sink for SummarySink {
569    type Error = io::Error;
570
571    fn matched(&mut self, _: &Searcher, mat: &SinkMatch<'_>) -> Result<bool, Self::Error> {
572        self.matched = true;
573        if self.mode == SearchMode::CountMatches {
574            if let Some(ref matcher) = self.matcher {
575                let line = mat.bytes();
576                let mut n = 0;
577                let _ = matcher.find_iter(line, |_| {
578                    n += 1;
579                    true
580                });
581                self.count += n;
582            }
583        } else {
584            self.count += 1;
585        }
586        Ok(matches!(
587            self.mode,
588            SearchMode::Count | SearchMode::CountMatches
589        ))
590    }
591}
592
593fn write_summary_record(
594    out: &mut Vec<u8>,
595    output: SearchOutput,
596    path: &Path,
597    result: FileSummary,
598) -> io::Result<()> {
599    if output.emission == OutputEmission::Quiet {
600        return Ok(());
601    }
602    match output.mode {
603        SearchMode::Count | SearchMode::CountMatches => {
604            if result.count == 0 {
605                return Ok(());
606            }
607            let print_filename = output.filename_mode != FilenameMode::Never;
608            if print_filename {
609                writeln!(out, "{}:{}", path.display(), result.count)
610            } else {
611                writeln!(out, "{}", result.count)
612            }
613        }
614        SearchMode::FilesWithMatches => {
615            if result.matched {
616                writeln!(out, "{}", path.display())
617            } else {
618                Ok(())
619            }
620        }
621        SearchMode::FilesWithoutMatch => {
622            if result.matched {
623                Ok(())
624            } else {
625                writeln!(out, "{}", path.display())
626            }
627        }
628        SearchMode::Standard | SearchMode::OnlyMatching => unreachable!(),
629    }
630}
631
632fn write_standard_prefix(
633    out: &mut Vec<u8>,
634    output: SearchOutput,
635    path: &Path,
636    line_number: Option<u64>,
637) -> io::Result<()> {
638    let print_filename = output.filename_mode != FilenameMode::Never;
639    if print_filename {
640        write!(out, "{}:", path.display())?;
641    }
642    if output.line_number {
643        write!(out, "{}:", line_number.unwrap_or(0))?;
644    }
645    Ok(())
646}
647
648#[allow(clippy::match_same_arms)]
649const fn mode_is_success(mode: SearchMode, result: FileSummary) -> bool {
650    match mode {
651        SearchMode::Count | SearchMode::CountMatches => result.count > 0,
652        SearchMode::FilesWithMatches => result.matched,
653        SearchMode::FilesWithoutMatch => !result.matched,
654        SearchMode::Standard | SearchMode::OnlyMatching => result.matched,
655    }
656}
657
658/// # Errors
659///
660/// Returns an error when canonicalizing `root` or while walking the tree.
661pub fn walk_file_paths(root: &Path) -> crate::Result<HashSet<PathBuf>> {
662    let root = root.canonicalize()?;
663    let mut set = HashSet::new();
664    let walker = ignore::WalkBuilder::new(&root).follow_links(false).build();
665    for entry in walker {
666        let entry = entry.map_err(crate::Error::Ignore)?;
667        if !entry.file_type().is_some_and(|ft| ft.is_file()) {
668            continue;
669        }
670        let path = entry.path();
671        let display = path.strip_prefix(&root).unwrap_or(path).to_path_buf();
672        set.insert(display);
673    }
674    Ok(set)
675}
676
677pub fn parallel_candidate_min_files() -> usize {
678    let cpus = std::thread::available_parallelism()
679        .map(std::num::NonZeroUsize::get)
680        .unwrap_or(1);
681    let rayon_threads = std::env::var("RAYON_NUM_THREADS")
682        .ok()
683        .and_then(|s| s.parse::<usize>().ok());
684    let effective = rayon_threads
685        .filter(|&n| n > 0)
686        .map_or(cpus, |rt| rt.min(cpus))
687        .max(1);
688    if effective <= 1 {
689        usize::MAX
690    } else {
691        effective.saturating_mul(8)
692    }
693}
694
695#[cfg(test)]
696struct CollectSink {
697    path: PathBuf,
698    only_matching: bool,
699    matcher: RegexMatcher,
700    matches: Vec<Match>,
701}
702
703#[cfg(test)]
704impl CollectSink {
705    fn new(path: PathBuf, only_matching: bool, matcher: RegexMatcher) -> Self {
706        Self {
707            path,
708            only_matching,
709            matcher,
710            matches: Vec::new(),
711        }
712    }
713
714    fn into_matches(self) -> Vec<Match> {
715        self.matches
716    }
717}
718
719#[cfg(test)]
720impl grep_searcher::Sink for CollectSink {
721    type Error = io::Error;
722
723    fn matched(
724        &mut self,
725        _: &grep_searcher::Searcher,
726        mat: &grep_searcher::SinkMatch<'_>,
727    ) -> Result<bool, Self::Error> {
728        let line = usize::try_from(mat.line_number().unwrap_or(0)).unwrap_or(0);
729        let line_bytes = mat.bytes();
730        if self.only_matching {
731            let _ = self
732                .matcher
733                .find_iter(line_bytes, |m: grep_matcher::Match| {
734                    self.matches.push(Match {
735                        file: self.path.clone(),
736                        line,
737                        text: String::from_utf8_lossy(&line_bytes[m.start()..m.end()]).into_owned(),
738                    });
739                    true
740                });
741        } else {
742            self.matches.push(Match {
743                file: self.path.clone(),
744                line,
745                text: String::from_utf8_lossy(line_bytes).into_owned(),
746            });
747        }
748        Ok(true)
749    }
750}