Skip to main content

sift_core/search/
execute.rs

1use std::collections::HashSet;
2use std::io::{self, Write};
3use std::path::{Path, PathBuf};
4use std::sync::atomic::{AtomicBool, Ordering};
5
6use grep_matcher::Matcher;
7use grep_regex::RegexMatcher;
8use grep_searcher::{Searcher, Sink, SinkMatch};
9use rayon::prelude::*;
10
11use crate::planner::TrigramPlan;
12use crate::Index;
13
14use super::{CompiledSearch, SearchMode, SearchOutput};
15
16#[cfg(test)]
17use super::Match;
18
19impl CompiledSearch {
20    #[must_use]
21    pub fn candidate_file_ids(
22        &self,
23        index: &Index,
24        prefixes: &[PathBuf],
25        exhaustive: bool,
26    ) -> Vec<usize> {
27        let ids: Vec<usize> = if exhaustive {
28            (0..index.file_count()).collect()
29        } else {
30            match &self.plan {
31                TrigramPlan::FullScan => (0..index.file_count()).collect(),
32                TrigramPlan::Narrow { arms } => index
33                    .candidate_file_ids(arms.as_slice())
34                    .into_iter()
35                    .map(|id| id as usize)
36                    .collect(),
37            }
38        };
39
40        ids.into_iter()
41            .filter(|&id| {
42                index
43                    .file_path(id)
44                    .is_some_and(|rel| path_in_scope(rel, prefixes))
45            })
46            .collect()
47    }
48
49    /// Execute a search over an opened index and print results to stdout.
50    ///
51    /// # Errors
52    ///
53    /// Returns an error if the matcher cannot be built or stdout cannot be written.
54    pub fn run_index(
55        &self,
56        index: &Index,
57        prefixes: &[PathBuf],
58        output: SearchOutput,
59    ) -> crate::Result<bool> {
60        let candidate_ids = self.candidate_file_ids(
61            index,
62            prefixes,
63            Self::uses_exhaustive_candidates(output.mode),
64        );
65        if candidate_ids.is_empty() {
66            return Ok(false);
67        }
68
69        let matcher = self.build_matcher()?;
70        let parallel = self.opts.max_results.is_none()
71            && candidate_ids.len() >= parallel_candidate_min_files();
72        match output.mode {
73            SearchMode::Standard | SearchMode::OnlyMatching => {
74                self.run_standard(index, &candidate_ids, &matcher, output, parallel)
75            }
76            SearchMode::Count
77            | SearchMode::FilesWithMatches
78            | SearchMode::FilesWithoutMatch
79            | SearchMode::Quiet => {
80                self.run_summary(index, &candidate_ids, &matcher, output, parallel)
81            }
82        }
83    }
84
85    fn run_standard(
86        &self,
87        index: &Index,
88        candidate_ids: &[usize],
89        matcher: &RegexMatcher,
90        output: SearchOutput,
91        parallel: bool,
92    ) -> crate::Result<bool> {
93        if parallel {
94            let stop = AtomicBool::new(false);
95            let mut files = candidate_ids
96                .par_iter()
97                .enumerate()
98                .map_init(
99                    || StandardWorker::new(self, matcher.clone(), output),
100                    |worker, (result_index, &id)| {
101                        worker.search_candidate(index, result_index, id, &stop)
102                    },
103                )
104                .collect::<Vec<_>>();
105            files.sort_by_key(|file| file.index);
106            return flush_chunk_output(files.into_iter().map(|file| file.output));
107        }
108
109        self.run_standard_capped(index, candidate_ids, matcher, output)
110    }
111
112    fn run_summary(
113        &self,
114        index: &Index,
115        candidate_ids: &[usize],
116        matcher: &RegexMatcher,
117        output: SearchOutput,
118        parallel: bool,
119    ) -> crate::Result<bool> {
120        if parallel {
121            let stop = AtomicBool::new(false);
122            let mut files = candidate_ids
123                .par_iter()
124                .enumerate()
125                .map_init(
126                    || SummaryWorker::new(self, matcher.clone()),
127                    |worker, (result_index, &id)| {
128                        worker.search_candidate(index, result_index, id, output, &stop)
129                    },
130                )
131                .collect::<Vec<_>>();
132            files.sort_by_key(|file| file.index);
133            return flush_chunk_output(files.into_iter().map(|file| file.output));
134        }
135
136        self.run_summary_capped(index, candidate_ids, matcher, output)
137    }
138
139    fn run_standard_capped(
140        &self,
141        index: &Index,
142        candidate_ids: &[usize],
143        matcher: &RegexMatcher,
144        output: SearchOutput,
145    ) -> crate::Result<bool> {
146        let mut any_match = false;
147        let mut remaining = self.opts.max_results;
148        let mut out = Vec::new();
149        let mut actual = index.root.clone();
150        for &id in candidate_ids {
151            let mut searcher = self.build_searcher(output.line_number, remaining);
152            let Some(candidate) = index.file_path(id) else {
153                continue;
154            };
155            actual.push(candidate);
156            let depth = candidate.components().count();
157            let mut sink = StandardSink::new(matcher, output, &actual, &mut out);
158            let _ = searcher.search_path(matcher, &actual, &mut sink);
159            any_match |= sink.matched;
160            let used = sink.match_count;
161            for _ in 0..depth {
162                actual.pop();
163            }
164            if let Some(ref mut left) = remaining {
165                *left = left.saturating_sub(used);
166                if *left == 0 {
167                    break;
168                }
169            }
170        }
171
172        flush_chunk_output(std::iter::once(ChunkOutput {
173            bytes: out,
174            matched: any_match,
175        }))
176    }
177
178    fn run_summary_capped(
179        &self,
180        index: &Index,
181        candidate_ids: &[usize],
182        matcher: &RegexMatcher,
183        output: SearchOutput,
184    ) -> crate::Result<bool> {
185        let mut any_match = false;
186        let mut remaining = self.opts.max_results;
187        let mut out = Vec::new();
188        let mut worker = SummaryWorker::new(self, matcher.clone());
189        let mut actual = index.root.clone();
190        for &id in candidate_ids {
191            let Some(candidate) = index.file_path(id) else {
192                continue;
193            };
194            actual.push(candidate);
195            let depth = candidate.components().count();
196            let result = worker.search_file(&actual, output.mode);
197            any_match |= mode_is_success(output.mode, result);
198            write_summary_record(&mut out, output, &actual, result)?;
199            for _ in 0..depth {
200                actual.pop();
201            }
202            if let Some(ref mut left) = remaining {
203                *left = left.saturating_sub(usize::from(result.matched));
204                if *left == 0 {
205                    break;
206                }
207            }
208            if matches!(output.mode, SearchMode::Quiet) && result.matched {
209                break;
210            }
211        }
212
213        flush_chunk_output(std::iter::once(ChunkOutput {
214            bytes: out,
215            matched: any_match,
216        }))
217    }
218
219    #[cfg(test)]
220    pub(crate) fn collect_index_matches(&self, index: &Index) -> crate::Result<Vec<Match>> {
221        let candidate_ids = self.candidate_file_ids(index, &[], false);
222        self.collect_index_candidates(index, &candidate_ids)
223    }
224
225    #[cfg(test)]
226    pub(crate) fn collect_walk_matches(&self, root: &Path) -> crate::Result<Vec<Match>> {
227        let root = root.canonicalize()?;
228        let mut candidates = Vec::new();
229        let walker = ignore::WalkBuilder::new(&root).follow_links(false).build();
230        for entry in walker {
231            let entry = entry.map_err(crate::Error::Ignore)?;
232            if entry.file_type().is_some_and(|ft| ft.is_file()) {
233                candidates.push(entry.path().to_path_buf());
234            }
235        }
236        self.collect_walk_candidates(&candidates)
237    }
238
239    #[cfg(test)]
240    fn collect_index_candidates(
241        &self,
242        index: &Index,
243        candidate_ids: &[usize],
244    ) -> crate::Result<Vec<Match>> {
245        let matcher = self.build_matcher()?;
246        let mut searcher = self.build_searcher(true, None);
247        let mut out = Vec::new();
248        let mut actual = index.root.clone();
249        for &id in candidate_ids {
250            let Some(candidate) = index.file_path(id) else {
251                continue;
252            };
253            actual.push(candidate);
254            let depth = candidate.components().count();
255            let mut sink =
256                CollectSink::new(actual.clone(), self.opts.only_matching(), matcher.clone());
257            let _ = searcher.search_path(&matcher, &actual, &mut sink);
258            for _ in 0..depth {
259                actual.pop();
260            }
261            out.extend(sink.into_matches());
262        }
263        Ok(out)
264    }
265
266    #[cfg(test)]
267    fn collect_walk_candidates(&self, candidates: &[PathBuf]) -> crate::Result<Vec<Match>> {
268        let matcher = self.build_matcher()?;
269        let mut searcher = self.build_searcher(true, None);
270        let mut out = Vec::new();
271        for candidate in candidates {
272            let mut sink = CollectSink::new(
273                candidate.clone(),
274                self.opts.only_matching(),
275                matcher.clone(),
276            );
277            let _ = searcher.search_path(&matcher, candidate, &mut sink);
278            out.extend(sink.into_matches());
279        }
280        Ok(out)
281    }
282}
283
284struct StandardWorker {
285    matcher: RegexMatcher,
286    searcher: Searcher,
287    output: SearchOutput,
288    bytes: Vec<u8>,
289}
290
291impl StandardWorker {
292    fn new(search: &CompiledSearch, matcher: RegexMatcher, output: SearchOutput) -> Self {
293        Self {
294            searcher: search.build_searcher(output.line_number, None),
295            matcher,
296            output,
297            bytes: Vec::new(),
298        }
299    }
300
301    fn search_candidate(
302        &mut self,
303        index: &Index,
304        result_index: usize,
305        id: usize,
306        stop: &AtomicBool,
307    ) -> FileResult {
308        self.bytes.clear();
309        if stop.load(Ordering::SeqCst) {
310            return FileResult {
311                index: result_index,
312                output: ChunkOutput {
313                    bytes: Vec::new(),
314                    matched: false,
315                },
316            };
317        }
318
319        let Some(candidate) = index.file_path(id) else {
320            return FileResult {
321                index: result_index,
322                output: ChunkOutput {
323                    bytes: Vec::new(),
324                    matched: false,
325                },
326            };
327        };
328        let actual = index.root.join(candidate);
329        let matched = {
330            let mut sink = StandardSink::new(&self.matcher, self.output, &actual, &mut self.bytes);
331            let _ = self.searcher.search_path(&self.matcher, &actual, &mut sink);
332            sink.matched
333        };
334
335        FileResult {
336            index: result_index,
337            output: ChunkOutput {
338                bytes: self.bytes.clone(),
339                matched,
340            },
341        }
342    }
343}
344
345struct StandardSink<'a> {
346    matcher: &'a RegexMatcher,
347    output: SearchOutput,
348    path: &'a Path,
349    bytes: &'a mut Vec<u8>,
350    matched: bool,
351    match_count: usize,
352}
353
354impl<'a> StandardSink<'a> {
355    const fn new(
356        matcher: &'a RegexMatcher,
357        output: SearchOutput,
358        path: &'a Path,
359        bytes: &'a mut Vec<u8>,
360    ) -> Self {
361        Self {
362            matcher,
363            output,
364            path,
365            bytes,
366            matched: false,
367            match_count: 0,
368        }
369    }
370}
371
372impl Sink for StandardSink<'_> {
373    type Error = io::Error;
374
375    fn matched(&mut self, _: &Searcher, mat: &SinkMatch<'_>) -> Result<bool, Self::Error> {
376        self.matched = true;
377        self.match_count += 1;
378
379        if matches!(self.output.mode, SearchMode::OnlyMatching) {
380            let line_number = mat.line_number();
381            let line = mat.bytes();
382            let _ = self.matcher.find_iter(line, |m: grep_matcher::Match| {
383                let _ = write_standard_prefix(self.bytes, self.output, self.path, line_number);
384                let _ = self.bytes.write_all(&line[m.start()..m.end()]);
385                let _ = self.bytes.write_all(b"\n");
386                true
387            });
388            return Ok(true);
389        }
390
391        write_standard_prefix(self.bytes, self.output, self.path, mat.line_number())?;
392        self.bytes.write_all(mat.bytes())?;
393        if !mat.bytes().ends_with(b"\n") {
394            self.bytes.write_all(b"\n")?;
395        }
396        Ok(true)
397    }
398}
399
400struct SummaryWorker {
401    matcher: RegexMatcher,
402    searcher: Searcher,
403}
404
405impl SummaryWorker {
406    fn new(search: &CompiledSearch, matcher: RegexMatcher) -> Self {
407        Self {
408            searcher: search.build_searcher(false, None),
409            matcher,
410        }
411    }
412
413    fn search_file(&mut self, path: &Path, mode: SearchMode) -> FileSummary {
414        let mut sink = SummarySink::new(mode);
415        let _ = self.searcher.search_path(&self.matcher, path, &mut sink);
416        sink.finish()
417    }
418
419    fn search_candidate(
420        &mut self,
421        index: &Index,
422        result_index: usize,
423        id: usize,
424        output: SearchOutput,
425        stop: &AtomicBool,
426    ) -> FileResult {
427        if stop.load(Ordering::SeqCst) {
428            return FileResult {
429                index: result_index,
430                output: ChunkOutput {
431                    bytes: Vec::new(),
432                    matched: false,
433                },
434            };
435        }
436
437        let Some(candidate) = index.file_path(id) else {
438            return FileResult {
439                index: result_index,
440                output: ChunkOutput {
441                    bytes: Vec::new(),
442                    matched: false,
443                },
444            };
445        };
446
447        let actual = index.root.join(candidate);
448        let result = self.search_file(&actual, output.mode);
449        let matched = mode_is_success(output.mode, result);
450        let mut bytes = Vec::new();
451        let _ = write_summary_record(&mut bytes, output, &actual, result);
452        if matches!(output.mode, SearchMode::Quiet) && result.matched {
453            stop.store(true, Ordering::SeqCst);
454        }
455
456        FileResult {
457            index: result_index,
458            output: ChunkOutput { bytes, matched },
459        }
460    }
461}
462
463struct FileResult {
464    index: usize,
465    output: ChunkOutput,
466}
467
468struct ChunkOutput {
469    bytes: Vec<u8>,
470    matched: bool,
471}
472
473fn flush_chunk_output(outputs: impl IntoIterator<Item = ChunkOutput>) -> crate::Result<bool> {
474    let mut stdout = io::stdout().lock();
475    let mut any_match = false;
476    for output in outputs {
477        any_match |= output.matched;
478        if output.bytes.is_empty() {
479            continue;
480        }
481        stdout.write_all(&output.bytes)?;
482    }
483    Ok(any_match)
484}
485
486#[derive(Clone, Copy)]
487struct FileSummary {
488    matched: bool,
489    count: usize,
490}
491
492struct SummarySink {
493    mode: SearchMode,
494    matched: bool,
495    count: usize,
496}
497
498impl SummarySink {
499    const fn new(mode: SearchMode) -> Self {
500        Self {
501            mode,
502            matched: false,
503            count: 0,
504        }
505    }
506
507    const fn finish(self) -> FileSummary {
508        FileSummary {
509            matched: self.matched,
510            count: self.count,
511        }
512    }
513}
514
515impl Sink for SummarySink {
516    type Error = io::Error;
517
518    fn matched(&mut self, _: &Searcher, _: &SinkMatch<'_>) -> Result<bool, Self::Error> {
519        self.matched = true;
520        self.count += 1;
521        Ok(matches!(self.mode, SearchMode::Count))
522    }
523}
524
525fn write_summary_record(
526    out: &mut Vec<u8>,
527    output: SearchOutput,
528    path: &Path,
529    result: FileSummary,
530) -> io::Result<()> {
531    match output.mode {
532        SearchMode::Count => {
533            if output.with_filename {
534                writeln!(out, "{}:{}", path.display(), result.count)
535            } else {
536                writeln!(out, "{}", result.count)
537            }
538        }
539        SearchMode::FilesWithMatches => {
540            if result.matched && output.with_filename {
541                writeln!(out, "{}", path.display())
542            } else {
543                Ok(())
544            }
545        }
546        SearchMode::FilesWithoutMatch => {
547            if !result.matched && output.with_filename {
548                writeln!(out, "{}", path.display())
549            } else {
550                Ok(())
551            }
552        }
553        SearchMode::Quiet => Ok(()),
554        SearchMode::Standard | SearchMode::OnlyMatching => unreachable!(),
555    }
556}
557
558fn write_standard_prefix(
559    out: &mut Vec<u8>,
560    output: SearchOutput,
561    path: &Path,
562    line_number: Option<u64>,
563) -> io::Result<()> {
564    if output.with_filename {
565        write!(out, "{}:", path.display())?;
566    }
567    if output.line_number {
568        write!(out, "{}:", line_number.unwrap_or(0))?;
569    }
570    Ok(())
571}
572
573const fn mode_is_success(mode: SearchMode, result: FileSummary) -> bool {
574    match mode {
575        SearchMode::Count | SearchMode::FilesWithMatches | SearchMode::Quiet => result.matched,
576        SearchMode::FilesWithoutMatch => !result.matched,
577        SearchMode::Standard | SearchMode::OnlyMatching => unreachable!(),
578    }
579}
580
581fn path_in_scope(rel: &Path, prefixes: &[PathBuf]) -> bool {
582    if prefixes.is_empty() {
583        return true;
584    }
585    prefixes
586        .iter()
587        .any(|pre| rel.starts_with(pre) || rel.as_os_str() == pre.as_os_str())
588}
589
590/// Walk a directory tree and return all indexed file paths relative to `root`.
591///
592/// # Errors
593///
594/// Returns an error when canonicalizing `root` or while walking the tree.
595pub fn walk_file_paths(root: &Path) -> crate::Result<HashSet<PathBuf>> {
596    let root = root.canonicalize()?;
597    let mut set = HashSet::new();
598    let walker = ignore::WalkBuilder::new(&root).follow_links(false).build();
599    for entry in walker {
600        let entry = entry.map_err(crate::Error::Ignore)?;
601        if !entry.file_type().is_some_and(|ft| ft.is_file()) {
602            continue;
603        }
604        let path = entry.path();
605        let display = path.strip_prefix(&root).unwrap_or(path).to_path_buf();
606        set.insert(display);
607    }
608    Ok(set)
609}
610
611pub fn parallel_candidate_min_files() -> usize {
612    let cpus = std::thread::available_parallelism()
613        .map(std::num::NonZeroUsize::get)
614        .unwrap_or(1);
615    let rayon_threads = std::env::var("RAYON_NUM_THREADS")
616        .ok()
617        .and_then(|s| s.parse::<usize>().ok());
618    let effective = rayon_threads
619        .filter(|&n| n > 0)
620        .map_or(cpus, |rt| rt.min(cpus))
621        .max(1);
622    if effective <= 1 {
623        usize::MAX
624    } else {
625        effective.saturating_mul(8)
626    }
627}
628
629#[cfg(test)]
630struct CollectSink {
631    path: PathBuf,
632    only_matching: bool,
633    matcher: RegexMatcher,
634    matches: Vec<Match>,
635}
636
637#[cfg(test)]
638impl CollectSink {
639    fn new(path: PathBuf, only_matching: bool, matcher: RegexMatcher) -> Self {
640        Self {
641            path,
642            only_matching,
643            matcher,
644            matches: Vec::new(),
645        }
646    }
647
648    fn into_matches(self) -> Vec<Match> {
649        self.matches
650    }
651}
652
653#[cfg(test)]
654impl grep_searcher::Sink for CollectSink {
655    type Error = io::Error;
656
657    fn matched(
658        &mut self,
659        _: &grep_searcher::Searcher,
660        mat: &grep_searcher::SinkMatch<'_>,
661    ) -> Result<bool, Self::Error> {
662        let line = usize::try_from(mat.line_number().unwrap_or(0)).unwrap_or(0);
663        let line_bytes = mat.bytes();
664        if self.only_matching {
665            let _ = self
666                .matcher
667                .find_iter(line_bytes, |m: grep_matcher::Match| {
668                    self.matches.push(Match {
669                        file: self.path.clone(),
670                        line,
671                        text: String::from_utf8_lossy(&line_bytes[m.start()..m.end()]).into_owned(),
672                    });
673                    true
674                });
675        } else {
676            self.matches.push(Match {
677                file: self.path.clone(),
678                line,
679                text: String::from_utf8_lossy(line_bytes).into_owned(),
680            });
681        }
682        Ok(true)
683    }
684}