Skip to main content

mcp_methods/grep/
mod.rs

1//! ripgrep-powered file + line search.
2//!
3//! Pure Rust — Python bindings are in `mcp-methods-py`. Two entry
4//! points:
5//! - [`ripgrep_files`] walks a directory tree and searches every file
6//!   matching the glob/type filter. Optional `transform` callback runs
7//!   per-file before search (the Python wrapper bridges a callable into
8//!   `&dyn Fn(&str) -> String`).
9//! - [`ripgrep_lines`] greps through a `Vec<String>` of lines with
10//!   context-window merging. Returns structured matches.
11
12mod searcher;
13mod types;
14mod walker;
15
16use std::path::PathBuf;
17
18use types::{FileMatch, OutputMode};
19
20/// Result of [`ripgrep_lines`] — one entry per merged context window.
21#[derive(Debug, Clone)]
22pub struct RipgrepLinesGroup {
23    /// 1-indexed line numbers of the matching lines in this window.
24    pub lines: Vec<usize>,
25    /// 1-indexed start line of the window (inclusive).
26    pub context_start: usize,
27    /// 1-indexed end line of the window (inclusive).
28    pub context_end: usize,
29    /// Joined content of the window.
30    pub content: String,
31}
32
33/// Optional knobs for [`ripgrep_files`].
34#[derive(Default)]
35pub struct RipgrepFilesOpts<'a> {
36    /// File-name glob (default `"*"`).
37    pub glob: Option<&'a str>,
38    /// File-type filter (`"py"`, `"rust"`, …).
39    pub type_filter: Option<&'a str>,
40    /// `"content"` (default) | `"files_with_matches"` | `"count"`.
41    pub output_mode: Option<&'a str>,
42    pub case_insensitive: bool,
43    pub multiline: bool,
44    pub context_before: usize,
45    pub context_after: usize,
46    /// Symmetric context — overridden by `context_before` / `context_after` when set.
47    pub context: usize,
48    pub line_numbers: bool,
49    pub max_results: Option<usize>,
50    pub offset: usize,
51    pub match_limit: Option<usize>,
52    pub skip_dirs: Option<&'a [String]>,
53    pub relative_to: Option<&'a str>,
54    pub respect_gitignore: bool,
55    /// Per-file transform applied to raw content before search. Used by
56    /// the Python wrapper to bridge a Python callable; pure-Rust callers
57    /// typically leave this `None`.
58    pub transform: Option<&'a dyn Fn(&str) -> String>,
59}
60
61impl<'a> RipgrepFilesOpts<'a> {
62    /// Builder-style helper for the most common defaults.
63    pub fn new() -> Self {
64        Self {
65            line_numbers: true,
66            respect_gitignore: true,
67            ..Default::default()
68        }
69    }
70}
71
72/// Search for a regex pattern across files using ripgrep's engine.
73///
74/// Uses grep-searcher (mmap, SIMD, binary detection), grep-regex (literal
75/// optimization), and ignore (parallel walk, .gitignore support).
76pub fn ripgrep_files(source_dirs: &[String], pattern: &str, opts: &RipgrepFilesOpts) -> String {
77    let glob = opts.glob.unwrap_or("*");
78    let output_mode = opts.output_mode.unwrap_or("content");
79
80    let mode = match OutputMode::from_str(output_mode) {
81        Ok(m) => m,
82        Err(e) => return e,
83    };
84
85    let matcher = match searcher::build_matcher(pattern, opts.case_insensitive, opts.multiline) {
86        Ok(m) => m,
87        Err(e) => return e,
88    };
89
90    let ctx_before = if opts.context_before > 0 {
91        opts.context_before
92    } else {
93        opts.context
94    };
95    let ctx_after = if opts.context_after > 0 {
96        opts.context_after
97    } else {
98        opts.context
99    };
100
101    let rel_base = opts.relative_to.map(PathBuf::from);
102
103    let file_matches: Vec<FileMatch> = if let Some(transform) = opts.transform {
104        // Sequential path: caller-supplied transform runs per-file before search.
105        let paths = match walker::walk_sequential(
106            source_dirs,
107            glob,
108            opts.type_filter,
109            opts.skip_dirs,
110            opts.respect_gitignore,
111        ) {
112            Ok(p) => p,
113            Err(e) => return e,
114        };
115        let mut matches = Vec::new();
116        let mut total = 0;
117        let has_context = ctx_before > 0 || ctx_after > 0;
118        let mut text_searcher =
119            searcher::build_searcher(ctx_before, ctx_after, opts.multiline, false);
120        let mut sink = searcher::CollectSink::new(has_context);
121
122        for path in &paths {
123            let raw = match std::fs::read_to_string(path) {
124                Ok(t) => t,
125                Err(_) => continue,
126            };
127            let text = transform(&raw);
128
129            sink.clear();
130            if let Some((line_matches, context_lines)) =
131                searcher::search_text(&text, &matcher, &mut text_searcher, &mut sink)
132            {
133                total += line_matches.len();
134                matches.push(FileMatch {
135                    path: path.clone(),
136                    match_count: line_matches.len(),
137                    line_matches,
138                    context_lines,
139                });
140                if let Some(cap) = opts.match_limit {
141                    if total >= cap {
142                        break;
143                    }
144                }
145            }
146        }
147        matches
148    } else {
149        // Parallel path: walk + search in parallel walker threads.
150        match walker::walk_and_search_parallel(
151            source_dirs,
152            glob,
153            opts.type_filter,
154            opts.skip_dirs,
155            opts.respect_gitignore,
156            &matcher,
157            ctx_before,
158            ctx_after,
159            opts.multiline,
160            opts.match_limit.unwrap_or(0),
161        ) {
162            Ok(m) => m,
163            Err(e) => return e,
164        }
165    };
166
167    let source_path = PathBuf::from(&source_dirs[0]);
168    format_output(
169        &file_matches,
170        pattern,
171        mode,
172        opts.line_numbers,
173        opts.max_results,
174        opts.offset,
175        opts.match_limit,
176        rel_base.as_deref(),
177        &source_path,
178        glob,
179    )
180}
181
182/// Grep through `text_lines` with context-window merging.
183/// Returns one [`RipgrepLinesGroup`] per merged window.
184pub fn ripgrep_lines(
185    text_lines: &[String],
186    pattern: &str,
187    context: usize,
188) -> Result<Vec<RipgrepLinesGroup>, String> {
189    let regex = regex::Regex::new(pattern).map_err(|e| format!("Invalid regex: {}", e))?;
190
191    let mut raw: Vec<(usize, usize, usize)> = Vec::new();
192    for (idx, line) in text_lines.iter().enumerate() {
193        if regex.is_match(line) {
194            let start = idx.saturating_sub(context);
195            let end = (idx + context + 1).min(text_lines.len());
196            raw.push((idx + 1, start, end));
197        }
198    }
199
200    // Merge overlapping windows
201    struct Group {
202        lines: Vec<usize>,
203        start: usize,
204        end: usize,
205    }
206    let mut groups: Vec<Group> = Vec::new();
207    for (hit_line, start, end) in raw {
208        if let Some(last) = groups.last_mut() {
209            if start <= last.end {
210                last.lines.push(hit_line);
211                last.end = last.end.max(end);
212                continue;
213            }
214        }
215        groups.push(Group {
216            lines: vec![hit_line],
217            start,
218            end,
219        });
220    }
221
222    Ok(groups
223        .into_iter()
224        .map(|g| {
225            let content = text_lines[g.start..g.end].join("\n");
226            RipgrepLinesGroup {
227                lines: g.lines,
228                context_start: g.start + 1,
229                context_end: g.end,
230                content,
231            }
232        })
233        .collect())
234}
235
236// ---------------------------------------------------------------------------
237// Output formatting
238// ---------------------------------------------------------------------------
239
240#[allow(clippy::too_many_arguments)]
241fn format_output(
242    file_matches: &[FileMatch],
243    pattern: &str,
244    mode: OutputMode,
245    line_numbers: bool,
246    max_results: Option<usize>,
247    offset: usize,
248    match_limit: Option<usize>,
249    relative_to: Option<&std::path::Path>,
250    source_path: &std::path::Path,
251    glob: &str,
252) -> String {
253    match mode {
254        OutputMode::Content => format_content(
255            file_matches,
256            pattern,
257            line_numbers,
258            max_results,
259            offset,
260            match_limit,
261            relative_to,
262            source_path,
263            glob,
264        ),
265        OutputMode::FilesWithMatches => format_files(
266            file_matches,
267            max_results,
268            offset,
269            match_limit,
270            relative_to,
271            source_path,
272        ),
273        OutputMode::Count => format_count(
274            file_matches,
275            max_results,
276            offset,
277            match_limit,
278            relative_to,
279            source_path,
280        ),
281    }
282}
283
284#[allow(clippy::too_many_arguments)]
285fn format_content(
286    file_matches: &[FileMatch],
287    pattern: &str,
288    line_numbers: bool,
289    max_results: Option<usize>,
290    offset: usize,
291    match_limit: Option<usize>,
292    relative_to: Option<&std::path::Path>,
293    source_path: &std::path::Path,
294    glob: &str,
295) -> String {
296    let estimated: usize = file_matches
297        .iter()
298        .map(|fm| fm.line_matches.len() + fm.context_lines.len())
299        .sum();
300    let mut lines: Vec<String> = Vec::with_capacity(estimated);
301
302    for fm in file_matches {
303        let rel = walker::relativize(&fm.path, relative_to, source_path);
304
305        if fm.context_lines.is_empty() {
306            for lm in &fm.line_matches {
307                if line_numbers {
308                    lines.push(format!(
309                        "  {}:{}:{} {}",
310                        rel, lm.line_number, ':', lm.content
311                    ));
312                } else {
313                    lines.push(format!("  {}  {}", rel, lm.content));
314                }
315            }
316        } else {
317            let matches = &fm.line_matches;
318            let contexts = &fm.context_lines;
319            let mut mi = 0;
320            let mut ci = 0;
321            let mut prev_ln: Option<u64> = None;
322
323            while mi < matches.len() || ci < contexts.len() {
324                let (ln, content, is_match) = match (matches.get(mi), contexts.get(ci)) {
325                    (Some(m), Some((cln, _))) if m.line_number <= *cln => {
326                        if *cln == m.line_number {
327                            ci += 1;
328                        }
329                        mi += 1;
330                        (m.line_number, m.content.as_str(), true)
331                    }
332                    (Some(_), Some((cln, cc))) => {
333                        ci += 1;
334                        (*cln, cc.as_str(), false)
335                    }
336                    (Some(m), None) => {
337                        mi += 1;
338                        (m.line_number, m.content.as_str(), true)
339                    }
340                    (None, Some((cln, cc))) => {
341                        ci += 1;
342                        (*cln, cc.as_str(), false)
343                    }
344                    (None, None) => unreachable!(),
345                };
346
347                if let Some(prev) = prev_ln {
348                    if ln > prev + 1 {
349                        lines.push("--".to_string());
350                    }
351                }
352                prev_ln = Some(ln);
353
354                if line_numbers {
355                    let sep = if is_match { ':' } else { '-' };
356                    lines.push(format!("  {}:{}{} {}", rel, ln, sep, content));
357                } else {
358                    lines.push(format!("  {}  {}", rel, content));
359                }
360            }
361        }
362    }
363
364    if offset > 0 && offset < lines.len() {
365        lines.drain(..offset);
366    } else if offset >= lines.len() && !lines.is_empty() {
367        lines.clear();
368    }
369    if let Some(limit) = max_results {
370        if lines.len() > limit {
371            lines.truncate(limit);
372        }
373    }
374
375    if lines.is_empty() {
376        return format!("No matches for '{}' in {} files.", pattern, glob);
377    }
378
379    let total_matches: usize = file_matches.iter().map(|fm| fm.match_count).sum();
380    let mut header = format!("Found {} match(es) for '{}'", total_matches, pattern);
381    if let Some(cap) = match_limit {
382        if total_matches >= cap {
383            header.push_str(&format!(" (capped at {})", cap));
384        }
385    }
386    header.push(':');
387
388    format!("{}\n{}", header, lines.join("\n"))
389}
390
391fn format_files(
392    file_matches: &[FileMatch],
393    max_results: Option<usize>,
394    offset: usize,
395    match_limit: Option<usize>,
396    relative_to: Option<&std::path::Path>,
397    source_path: &std::path::Path,
398) -> String {
399    let mut paths: Vec<String> = file_matches
400        .iter()
401        .map(|fm| walker::relativize(&fm.path, relative_to, source_path))
402        .collect();
403
404    if offset > 0 && offset < paths.len() {
405        paths.drain(..offset);
406    } else if offset >= paths.len() && !paths.is_empty() {
407        paths.clear();
408    }
409    if let Some(limit) = max_results {
410        if paths.len() > limit {
411            paths.truncate(limit);
412        }
413    }
414
415    if paths.is_empty() {
416        return "No matching files.".to_string();
417    }
418
419    let mut result = paths.join("\n");
420    if let Some(cap) = match_limit {
421        let total_matches: usize = file_matches.iter().map(|fm| fm.match_count).sum();
422        if total_matches >= cap {
423            result.push_str(&format!(
424                "\n\n(results may be incomplete — hit {} match limit across {} files)",
425                cap,
426                file_matches.len()
427            ));
428        }
429    }
430    result
431}
432
433fn format_count(
434    file_matches: &[FileMatch],
435    max_results: Option<usize>,
436    offset: usize,
437    match_limit: Option<usize>,
438    relative_to: Option<&std::path::Path>,
439    source_path: &std::path::Path,
440) -> String {
441    let mut entries: Vec<String> = file_matches
442        .iter()
443        .map(|fm| {
444            let rel = walker::relativize(&fm.path, relative_to, source_path);
445            format!("{}:{}", rel, fm.match_count)
446        })
447        .collect();
448
449    if offset > 0 && offset < entries.len() {
450        entries.drain(..offset);
451    } else if offset >= entries.len() && !entries.is_empty() {
452        entries.clear();
453    }
454    if let Some(limit) = max_results {
455        if entries.len() > limit {
456            entries.truncate(limit);
457        }
458    }
459
460    if entries.is_empty() {
461        return "No matching files.".to_string();
462    }
463
464    let mut result = entries.join("\n");
465    if let Some(cap) = match_limit {
466        let total_matches: usize = file_matches.iter().map(|fm| fm.match_count).sum();
467        if total_matches >= cap {
468            result.push_str(&format!(
469                "\n\n(results may be incomplete — hit {} match limit across {} files)",
470                cap,
471                file_matches.len()
472            ));
473        }
474    }
475    result
476}