Skip to main content

harness_grep/
engine.rs

1use crate::types::{RgCount, RgMatch};
2use anyhow::{anyhow, Result};
3use grep_regex::RegexMatcherBuilder;
4use grep_searcher::{BinaryDetection, Searcher, SearcherBuilder, Sink, SinkMatch};
5use ignore::{WalkBuilder, WalkState};
6use std::path::{Path, PathBuf};
7use std::sync::mpsc::{self, Receiver, Sender};
8use std::sync::{Arc, Mutex};
9
10/// Inputs to the engine. Shape-compatible with the TS `GrepEngineInput`;
11/// the only divergence is `signal` — Rust tool calls don't take an
12/// explicit cancel signal at this layer (yet), so the engine runs to
13/// natural completion.
14#[derive(Debug, Clone)]
15pub struct GrepEngineInput {
16    pub pattern: String,
17    pub root: PathBuf,
18    pub glob: Option<String>,
19    pub r#type: Option<String>,
20    pub case_insensitive: bool,
21    pub multiline: bool,
22    pub context_before: usize,
23    pub context_after: usize,
24    pub max_columns: usize,
25    pub max_filesize: u64,
26}
27
28/// Same pluggable-backend idea as the TS `GrepEngine`: default wraps
29/// ripgrep's library, tests or SSH-remote harnesses can substitute.
30pub trait GrepEngine: Send + Sync {
31    fn search(&self, input: &GrepEngineInput) -> Result<Vec<RgMatch>>;
32    fn count(&self, input: &GrepEngineInput) -> Result<Vec<RgCount>>;
33}
34
35pub fn default_engine() -> Box<dyn GrepEngine> {
36    Box::new(RipgrepLibEngine::new())
37}
38
39/// The default engine using BurntSushi/ripgrep's library crates directly.
40///
41/// We deliberately mirror the CLI invariants the TS version passes to
42/// the `ripgrep` npm wrapper: no hidden, no follow-symlink, no ignore
43/// config, .gitignore respected, max_filesize cap, max_columns cap. The
44/// `ignore` crate handles file discovery; `grep-searcher` does the line
45/// iteration; `grep-regex` is the matcher.
46pub struct RipgrepLibEngine;
47
48impl Default for RipgrepLibEngine {
49    fn default() -> Self {
50        Self::new()
51    }
52}
53
54impl RipgrepLibEngine {
55    pub fn new() -> Self {
56        Self
57    }
58
59    fn build_matcher(
60        &self,
61        input: &GrepEngineInput,
62    ) -> Result<grep_regex::RegexMatcher> {
63        RegexMatcherBuilder::new()
64            .case_insensitive(input.case_insensitive)
65            .multi_line(input.multiline)
66            .dot_matches_new_line(input.multiline)
67            .build(&input.pattern)
68            .map_err(|e| anyhow!(e.to_string()))
69    }
70
71    fn build_walk(&self, input: &GrepEngineInput) -> WalkBuilder {
72        let mut wb = WalkBuilder::new(&input.root);
73        wb.hidden(true) // skip hidden by default
74            .git_ignore(true)
75            .git_global(true)
76            .git_exclude(true)
77            .ignore(true)
78            .parents(true)
79            .follow_links(false)
80            .max_filesize(Some(input.max_filesize))
81            .require_git(false)
82            .add_custom_ignore_filename(".rgignore");
83
84        if let Some(g) = input.glob.as_deref() {
85            let mut b = ignore::overrides::OverrideBuilder::new(&input.root);
86            // Leading `!` inverts an `.ignore`-style entry; we pass glob
87            // as-is, matching the ripgrep `-g` semantics.
88            let _ = b.add(g);
89            if let Ok(over) = b.build() {
90                wb.overrides(over);
91            }
92        }
93        if let Some(t) = input.r#type.as_deref() {
94            let mut tb = ignore::types::TypesBuilder::new();
95            tb.add_defaults();
96            let _ = tb.select(t);
97            if let Ok(types) = tb.build() {
98                wb.types(types);
99            }
100        }
101        wb
102    }
103
104    fn make_searcher(&self, input: &GrepEngineInput) -> Searcher {
105        let mut sb = SearcherBuilder::new();
106        sb.binary_detection(BinaryDetection::quit(b'\x00'))
107            .multi_line(input.multiline);
108        if input.context_before > 0 {
109            sb.before_context(input.context_before);
110        }
111        if input.context_after > 0 {
112            sb.after_context(input.context_after);
113        }
114        sb.build()
115    }
116}
117
118impl GrepEngine for RipgrepLibEngine {
119    fn search(&self, input: &GrepEngineInput) -> Result<Vec<RgMatch>> {
120        let matcher = self.build_matcher(input)?;
121        let walker = self.build_walk(input).build_parallel();
122        let (tx, rx): (Sender<RgMatch>, Receiver<RgMatch>) = mpsc::channel();
123        let max_cols = input.max_columns;
124
125        let before_ctx = input.context_before;
126        let after_ctx = input.context_after;
127        let multi = input.multiline;
128        walker.run(|| {
129            let matcher = matcher.clone();
130            let tx = tx.clone();
131            Box::new(move |result| {
132                let entry = match result {
133                    Ok(e) => e,
134                    Err(_) => return WalkState::Continue,
135                };
136                let p = entry.path();
137                if !p.is_file() {
138                    return WalkState::Continue;
139                }
140                // Build a per-file searcher that honors context + multiline.
141                // Doing it inside the closure keeps thread-safety simple —
142                // Searcher is not Send, so we can't hoist it.
143                let mut sb = SearcherBuilder::new();
144                sb.binary_detection(BinaryDetection::quit(b'\x00'))
145                    .multi_line(multi);
146                if before_ctx > 0 {
147                    sb.before_context(before_ctx);
148                }
149                if after_ctx > 0 {
150                    sb.after_context(after_ctx);
151                }
152                let mut searcher = sb.build();
153                let mut sink = VecSink {
154                    path: p.to_string_lossy().into_owned(),
155                    matches: Vec::new(),
156                    max_cols,
157                };
158                let _ = searcher.search_path(&matcher, p, &mut sink);
159                for m in sink.matches {
160                    let _ = tx.send(m);
161                }
162                WalkState::Continue
163            })
164        });
165        drop(tx);
166        Ok(rx.into_iter().collect())
167    }
168
169    fn count(&self, input: &GrepEngineInput) -> Result<Vec<RgCount>> {
170        let matcher = self.build_matcher(input)?;
171        let walker = self.build_walk(input).build_parallel();
172        let counts: Arc<Mutex<Vec<RgCount>>> = Arc::new(Mutex::new(Vec::new()));
173        let max_cols = input.max_columns;
174
175        walker.run(|| {
176            let matcher = matcher.clone();
177            let counts = Arc::clone(&counts);
178            Box::new(move |result| {
179                let entry = match result {
180                    Ok(e) => e,
181                    Err(_) => return WalkState::Continue,
182                };
183                let p = entry.path();
184                if !p.is_file() {
185                    return WalkState::Continue;
186                }
187                let mut searcher = SearcherBuilder::new()
188                    .binary_detection(BinaryDetection::quit(b'\x00'))
189                    .build();
190                let mut sink = CountSink {
191                    count: 0,
192                    max_cols,
193                };
194                let _ = searcher.search_path(&matcher, p, &mut sink);
195                if sink.count > 0 {
196                    let mut g = counts.lock().unwrap();
197                    g.push(RgCount {
198                        path: p.to_string_lossy().into_owned(),
199                        count: sink.count,
200                    });
201                }
202                WalkState::Continue
203            })
204        });
205        let mut out = Arc::try_unwrap(counts).unwrap().into_inner().unwrap();
206        out.sort_by(|a, b| a.path.cmp(&b.path));
207        Ok(out)
208    }
209}
210
211// ---- grep-searcher sinks ----
212
213struct VecSink {
214    path: String,
215    matches: Vec<RgMatch>,
216    max_cols: usize,
217}
218
219impl Sink for VecSink {
220    type Error = std::io::Error;
221
222    fn matched(
223        &mut self,
224        _searcher: &Searcher,
225        mat: &SinkMatch<'_>,
226    ) -> Result<bool, Self::Error> {
227        let text = decode_line(mat.bytes(), self.max_cols);
228        let line_number = mat.line_number().unwrap_or(0);
229        self.matches.push(RgMatch {
230            path: self.path.clone(),
231            line_number,
232            text,
233            is_context: false,
234        });
235        Ok(true)
236    }
237
238    fn context(
239        &mut self,
240        _searcher: &Searcher,
241        ctx: &grep_searcher::SinkContext<'_>,
242    ) -> Result<bool, Self::Error> {
243        let text = decode_line(ctx.bytes(), self.max_cols);
244        let line_number = ctx.line_number().unwrap_or(0);
245        self.matches.push(RgMatch {
246            path: self.path.clone(),
247            line_number,
248            text,
249            is_context: true,
250        });
251        Ok(true)
252    }
253}
254
255struct CountSink {
256    count: u64,
257    max_cols: usize,
258}
259
260impl Sink for CountSink {
261    type Error = std::io::Error;
262
263    fn matched(
264        &mut self,
265        _searcher: &Searcher,
266        _mat: &SinkMatch<'_>,
267    ) -> Result<bool, Self::Error> {
268        self.count += 1;
269        let _ = self.max_cols; // unused; silence warning
270        Ok(true)
271    }
272}
273
274fn decode_line(bytes: &[u8], max_cols: usize) -> String {
275    let s = String::from_utf8_lossy(bytes);
276    let trimmed = s.trim_end_matches(|c| c == '\n' || c == '\r');
277    if trimmed.len() > max_cols {
278        format!(
279            "{}... (line truncated to {} chars)",
280            &trimmed[..max_cols],
281            max_cols
282        )
283    } else {
284        trimmed.to_string()
285    }
286}
287
288/// Detect whether a pattern compiles. Used so the tool can return
289/// `INVALID_REGEX` with the upstream error BEFORE attempting a full
290/// walk.
291pub fn compile_probe(pattern: &str) -> Result<(), String> {
292    match RegexMatcherBuilder::new().build(pattern) {
293        Ok(_) => Ok(()),
294        Err(e) => Err(e.to_string()),
295    }
296}
297
298/// Reasonably stable mtime sort used by `files_with_matches` and
299/// `content` modes. Falls back to path order when mtime is unavailable.
300pub fn sort_paths_by_mtime(paths: &mut Vec<String>) {
301    let mut with_mtime: Vec<(Option<std::time::SystemTime>, String)> = paths
302        .drain(..)
303        .map(|p| {
304            let mtime = std::fs::metadata(&p).ok().and_then(|m| m.modified().ok());
305            (mtime, p)
306        })
307        .collect();
308    with_mtime.sort_by(|a, b| match (a.0, b.0) {
309        (Some(ta), Some(tb)) => tb.cmp(&ta).then(a.1.cmp(&b.1)),
310        (Some(_), None) => std::cmp::Ordering::Less,
311        (None, Some(_)) => std::cmp::Ordering::Greater,
312        (None, None) => a.1.cmp(&b.1),
313    });
314    paths.extend(with_mtime.into_iter().map(|(_, p)| p));
315}