1use crate::types::{RgCount, RgMatch};
2use anyhow::{anyhow, Result};
3use grep_regex::RegexMatcherBuilder;
4use grep_searcher::{BinaryDetection, Searcher, SearcherBuilder, Sink, SinkMatch};
5use ignore::{WalkBuilder, WalkState};
6use std::path::{Path, PathBuf};
7use std::sync::mpsc::{self, Receiver, Sender};
8use std::sync::{Arc, Mutex};
9
10#[derive(Debug, Clone)]
15pub struct GrepEngineInput {
16 pub pattern: String,
17 pub root: PathBuf,
18 pub glob: Option<String>,
19 pub r#type: Option<String>,
20 pub case_insensitive: bool,
21 pub multiline: bool,
22 pub context_before: usize,
23 pub context_after: usize,
24 pub max_columns: usize,
25 pub max_filesize: u64,
26}
27
28pub trait GrepEngine: Send + Sync {
31 fn search(&self, input: &GrepEngineInput) -> Result<Vec<RgMatch>>;
32 fn count(&self, input: &GrepEngineInput) -> Result<Vec<RgCount>>;
33}
34
35pub fn default_engine() -> Box<dyn GrepEngine> {
36 Box::new(RipgrepLibEngine::new())
37}
38
39pub struct RipgrepLibEngine;
47
48impl Default for RipgrepLibEngine {
49 fn default() -> Self {
50 Self::new()
51 }
52}
53
54impl RipgrepLibEngine {
55 pub fn new() -> Self {
56 Self
57 }
58
59 fn build_matcher(
60 &self,
61 input: &GrepEngineInput,
62 ) -> Result<grep_regex::RegexMatcher> {
63 RegexMatcherBuilder::new()
64 .case_insensitive(input.case_insensitive)
65 .multi_line(input.multiline)
66 .dot_matches_new_line(input.multiline)
67 .build(&input.pattern)
68 .map_err(|e| anyhow!(e.to_string()))
69 }
70
71 fn build_walk(&self, input: &GrepEngineInput) -> WalkBuilder {
72 let mut wb = WalkBuilder::new(&input.root);
73 wb.hidden(true) .git_ignore(true)
75 .git_global(true)
76 .git_exclude(true)
77 .ignore(true)
78 .parents(true)
79 .follow_links(false)
80 .max_filesize(Some(input.max_filesize))
81 .require_git(false)
82 .add_custom_ignore_filename(".rgignore");
83
84 if let Some(g) = input.glob.as_deref() {
85 let mut b = ignore::overrides::OverrideBuilder::new(&input.root);
86 let _ = b.add(g);
89 if let Ok(over) = b.build() {
90 wb.overrides(over);
91 }
92 }
93 if let Some(t) = input.r#type.as_deref() {
94 let mut tb = ignore::types::TypesBuilder::new();
95 tb.add_defaults();
96 let _ = tb.select(t);
97 if let Ok(types) = tb.build() {
98 wb.types(types);
99 }
100 }
101 wb
102 }
103
104 fn make_searcher(&self, input: &GrepEngineInput) -> Searcher {
105 let mut sb = SearcherBuilder::new();
106 sb.binary_detection(BinaryDetection::quit(b'\x00'))
107 .multi_line(input.multiline);
108 if input.context_before > 0 {
109 sb.before_context(input.context_before);
110 }
111 if input.context_after > 0 {
112 sb.after_context(input.context_after);
113 }
114 sb.build()
115 }
116}
117
118impl GrepEngine for RipgrepLibEngine {
119 fn search(&self, input: &GrepEngineInput) -> Result<Vec<RgMatch>> {
120 let matcher = self.build_matcher(input)?;
121 let walker = self.build_walk(input).build_parallel();
122 let (tx, rx): (Sender<RgMatch>, Receiver<RgMatch>) = mpsc::channel();
123 let max_cols = input.max_columns;
124
125 let before_ctx = input.context_before;
126 let after_ctx = input.context_after;
127 let multi = input.multiline;
128 walker.run(|| {
129 let matcher = matcher.clone();
130 let tx = tx.clone();
131 Box::new(move |result| {
132 let entry = match result {
133 Ok(e) => e,
134 Err(_) => return WalkState::Continue,
135 };
136 let p = entry.path();
137 if !p.is_file() {
138 return WalkState::Continue;
139 }
140 let mut sb = SearcherBuilder::new();
144 sb.binary_detection(BinaryDetection::quit(b'\x00'))
145 .multi_line(multi);
146 if before_ctx > 0 {
147 sb.before_context(before_ctx);
148 }
149 if after_ctx > 0 {
150 sb.after_context(after_ctx);
151 }
152 let mut searcher = sb.build();
153 let mut sink = VecSink {
154 path: p.to_string_lossy().into_owned(),
155 matches: Vec::new(),
156 max_cols,
157 };
158 let _ = searcher.search_path(&matcher, p, &mut sink);
159 for m in sink.matches {
160 let _ = tx.send(m);
161 }
162 WalkState::Continue
163 })
164 });
165 drop(tx);
166 Ok(rx.into_iter().collect())
167 }
168
169 fn count(&self, input: &GrepEngineInput) -> Result<Vec<RgCount>> {
170 let matcher = self.build_matcher(input)?;
171 let walker = self.build_walk(input).build_parallel();
172 let counts: Arc<Mutex<Vec<RgCount>>> = Arc::new(Mutex::new(Vec::new()));
173 let max_cols = input.max_columns;
174
175 walker.run(|| {
176 let matcher = matcher.clone();
177 let counts = Arc::clone(&counts);
178 Box::new(move |result| {
179 let entry = match result {
180 Ok(e) => e,
181 Err(_) => return WalkState::Continue,
182 };
183 let p = entry.path();
184 if !p.is_file() {
185 return WalkState::Continue;
186 }
187 let mut searcher = SearcherBuilder::new()
188 .binary_detection(BinaryDetection::quit(b'\x00'))
189 .build();
190 let mut sink = CountSink {
191 count: 0,
192 max_cols,
193 };
194 let _ = searcher.search_path(&matcher, p, &mut sink);
195 if sink.count > 0 {
196 let mut g = counts.lock().unwrap();
197 g.push(RgCount {
198 path: p.to_string_lossy().into_owned(),
199 count: sink.count,
200 });
201 }
202 WalkState::Continue
203 })
204 });
205 let mut out = Arc::try_unwrap(counts).unwrap().into_inner().unwrap();
206 out.sort_by(|a, b| a.path.cmp(&b.path));
207 Ok(out)
208 }
209}
210
211struct VecSink {
214 path: String,
215 matches: Vec<RgMatch>,
216 max_cols: usize,
217}
218
219impl Sink for VecSink {
220 type Error = std::io::Error;
221
222 fn matched(
223 &mut self,
224 _searcher: &Searcher,
225 mat: &SinkMatch<'_>,
226 ) -> Result<bool, Self::Error> {
227 let text = decode_line(mat.bytes(), self.max_cols);
228 let line_number = mat.line_number().unwrap_or(0);
229 self.matches.push(RgMatch {
230 path: self.path.clone(),
231 line_number,
232 text,
233 is_context: false,
234 });
235 Ok(true)
236 }
237
238 fn context(
239 &mut self,
240 _searcher: &Searcher,
241 ctx: &grep_searcher::SinkContext<'_>,
242 ) -> Result<bool, Self::Error> {
243 let text = decode_line(ctx.bytes(), self.max_cols);
244 let line_number = ctx.line_number().unwrap_or(0);
245 self.matches.push(RgMatch {
246 path: self.path.clone(),
247 line_number,
248 text,
249 is_context: true,
250 });
251 Ok(true)
252 }
253}
254
255struct CountSink {
256 count: u64,
257 max_cols: usize,
258}
259
260impl Sink for CountSink {
261 type Error = std::io::Error;
262
263 fn matched(
264 &mut self,
265 _searcher: &Searcher,
266 _mat: &SinkMatch<'_>,
267 ) -> Result<bool, Self::Error> {
268 self.count += 1;
269 let _ = self.max_cols; Ok(true)
271 }
272}
273
274fn decode_line(bytes: &[u8], max_cols: usize) -> String {
275 let s = String::from_utf8_lossy(bytes);
276 let trimmed = s.trim_end_matches(|c| c == '\n' || c == '\r');
277 if trimmed.len() > max_cols {
278 format!(
279 "{}... (line truncated to {} chars)",
280 &trimmed[..max_cols],
281 max_cols
282 )
283 } else {
284 trimmed.to_string()
285 }
286}
287
288pub fn compile_probe(pattern: &str) -> Result<(), String> {
292 match RegexMatcherBuilder::new().build(pattern) {
293 Ok(_) => Ok(()),
294 Err(e) => Err(e.to_string()),
295 }
296}
297
298pub fn sort_paths_by_mtime(paths: &mut Vec<String>) {
301 let mut with_mtime: Vec<(Option<std::time::SystemTime>, String)> = paths
302 .drain(..)
303 .map(|p| {
304 let mtime = std::fs::metadata(&p).ok().and_then(|m| m.modified().ok());
305 (mtime, p)
306 })
307 .collect();
308 with_mtime.sort_by(|a, b| match (a.0, b.0) {
309 (Some(ta), Some(tb)) => tb.cmp(&ta).then(a.1.cmp(&b.1)),
310 (Some(_), None) => std::cmp::Ordering::Less,
311 (None, Some(_)) => std::cmp::Ordering::Greater,
312 (None, None) => a.1.cmp(&b.1),
313 });
314 paths.extend(with_mtime.into_iter().map(|(_, p)| p));
315}