Skip to main content

rgx/
confirm.rs

1//! The confirm step: run ripgrep's own engine over a set of files and emit `path:line:text`.
2//!
3//! This is where correctness lives — it is literally ripgrep's matcher, searcher, and printer, so
4//! output is byte-for-byte `rg`'s. We deliberately use `BinaryDetection::quit`, which reproduces
5//! ripgrep's *recursive-traversal* binary behavior (search until the first NUL, then stop) — not
6//! the explicit-file-argument behavior the `rg` binary would apply to a candidate list. See
7//! `docs/index-and-storage.md` sections 3.1 and 4.
8
9use std::path::Path;
10
11use anyhow::Result;
12use grep::printer::StandardBuilder;
13use grep::regex::{RegexMatcher, RegexMatcherBuilder};
14use grep::searcher::{BinaryDetection, Searcher, SearcherBuilder};
15use ignore::WalkState;
16use rayon::prelude::*;
17use termcolor::NoColor;
18
19/// User-facing search options (the subset of ripgrep flags rgx threads through so far). These
20/// travel over the daemon protocol and drive both query extraction and the confirm step.
21#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
22pub struct SearchOptions {
23    pub case_insensitive: bool,
24    pub multi_line: bool,
25    pub dot_matches_new_line: bool,
26    /// `-w`: match only at word boundaries.
27    pub word: bool,
28    /// `-F`: treat the pattern as a literal string.
29    pub fixed_strings: bool,
30    /// `-B` / `-C`: lines of leading context.
31    pub before_context: usize,
32    /// `-A` / `-C`: lines of trailing context.
33    pub after_context: usize,
34}
35
36/// Files searched per parallel batch; bounds peak memory and lets results stream out for huge
37/// result sets instead of buffering the whole corpus.
38const BATCH: usize = 512;
39
40pub(crate) fn build_matcher(pattern: &str, opts: SearchOptions) -> Result<RegexMatcher> {
41    Ok(RegexMatcherBuilder::new()
42        .case_insensitive(opts.case_insensitive)
43        .multi_line(opts.multi_line)
44        .dot_matches_new_line(opts.dot_matches_new_line)
45        .word(opts.word)
46        .build(pattern)?)
47}
48
49fn build_searcher(opts: SearchOptions) -> Searcher {
50    SearcherBuilder::new()
51        .line_number(true)
52        .binary_detection(BinaryDetection::quit(0))
53        .multi_line(opts.multi_line)
54        .before_context(opts.before_context)
55        .after_context(opts.after_context)
56        .build()
57}
58
59/// The path to print for `path`: relative to `root` (so output matches `rg`'s cwd-relative paths and
60/// cursors stay small) when `path` is under it, else `path` unchanged. The file is still read from the
61/// real `path`.
62fn display_path<'a>(path: &'a Path, root: &Path) -> &'a Path {
63    path.strip_prefix(root).unwrap_or(path)
64}
65
66/// Render one file's matches into `buf` (cleared first), exactly as `rg` would print them. The file is
67/// read from `path` but printed relative to `root`.
68fn search_one(
69    searcher: &mut Searcher,
70    matcher: &RegexMatcher,
71    path: &Path,
72    root: &Path,
73    buf: &mut Vec<u8>,
74) {
75    buf.clear();
76    let mut printer = StandardBuilder::new().build(NoColor::new(&mut *buf));
77    let shown = display_path(path, root);
78    let _ = searcher.search_path(matcher, path, printer.sink_with_path(matcher, shown));
79}
80
81/// Search a known `paths` set for `pattern` (already made effective — escaped for `-F` by the
82/// caller), emitting each file's rendered output via `emit`, in the order the paths are given
83/// (callers pass them sorted, so output is deterministic). Paths are printed relative to `root`.
84/// Memory stays bounded to one batch.
85pub fn search_streaming(
86    pattern: &str,
87    paths: &[&Path],
88    root: &Path,
89    opts: SearchOptions,
90    mut emit: impl FnMut(&[u8]) -> Result<()>,
91) -> Result<()> {
92    let matcher = build_matcher(pattern, opts)?;
93    for batch in paths.chunks(BATCH) {
94        let chunks: Vec<Vec<u8>> = batch
95            .par_iter()
96            .map_init(
97                || (build_searcher(opts), Vec::new()),
98                |(searcher, buf), path| {
99                    search_one(searcher, &matcher, path, root, buf);
100                    std::mem::take(buf)
101                },
102            )
103            .collect();
104        for c in &chunks {
105            emit(c)?;
106        }
107    }
108    Ok(())
109}
110
111/// Pipelined full-tree scan, matching ripgrep's own model: a parallel `ignore` walk feeds per-file
112/// search, and each thread streams its output through `sink` as files are discovered — no upfront
113/// walk-then-search split, no sort. Output order is therefore nondeterministic (like `rg` without
114/// `--sort`). Used for fallback queries (no usable trigram) and the daemon's cold start, entirely
115/// in-process — ripgrep's engine is linked in, so no `rg` binary is ever required.
116pub fn full_scan(
117    root: &Path,
118    pattern: &str,
119    opts: SearchOptions,
120    sink: impl Fn(&[u8]) + Sync,
121) -> Result<()> {
122    let matcher = build_matcher(pattern, opts)?;
123    let matcher = &matcher;
124    let sink = &sink;
125    crate::index::walk_builder(root).build_parallel().run(|| {
126        // Build the searcher and printer once per walk thread (not per file): for a match-everything
127        // query over tens of thousands of files, per-file printer construction dominates otherwise.
128        let mut searcher = build_searcher(opts);
129        let mut printer = StandardBuilder::new().build(NoColor::new(Vec::<u8>::new()));
130        Box::new(move |res| {
131            if let Ok(entry) = res
132                && entry.file_type().is_some_and(|t| t.is_file())
133            {
134                let path = entry.path();
135                let shown = display_path(path, root);
136                let _ = searcher.search_path(matcher, path, printer.sink_with_path(matcher, shown));
137                let buf = printer.get_mut().get_mut();
138                if !buf.is_empty() {
139                    sink(buf);
140                    buf.clear();
141                }
142            }
143            WalkState::Continue
144        })
145    });
146    Ok(())
147}
148
149/// Collecting convenience over [`search_streaming`] (used by tests and small in-process callers).
150pub fn search(pattern: &str, paths: &[&Path], root: &Path, opts: SearchOptions) -> Result<Vec<u8>> {
151    let mut out = Vec::new();
152    search_streaming(pattern, paths, root, opts, |c| {
153        out.extend_from_slice(c);
154        Ok(())
155    })?;
156    Ok(out)
157}
158
159#[cfg(test)]
160mod tests {
161    use super::*;
162
163    #[test]
164    fn emits_path_line_text() {
165        let tmp = std::env::temp_dir().join(format!("rgx_confirm_{}", std::process::id()));
166        let _ = std::fs::remove_dir_all(&tmp);
167        std::fs::create_dir_all(&tmp).unwrap();
168        let p = tmp.join("f.txt");
169        std::fs::write(&p, b"alpha\nbeta NEEDLE gamma\ndelta\n").unwrap();
170
171        let out = search("NEEDLE", &[p.as_path()], &tmp, SearchOptions::default()).unwrap();
172        let text = String::from_utf8(out).unwrap();
173        assert!(
174            text.starts_with("f.txt:2:beta NEEDLE gamma"),
175            "got: {text:?}"
176        );
177        assert!(!text.contains("alpha"));
178        assert!(
179            !text.contains(tmp.to_str().unwrap()),
180            "path should be relative: {text:?}"
181        );
182        let _ = std::fs::remove_dir_all(&tmp);
183    }
184}